1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 327f654740f2a26ad62a5c155af9199af9e69b889claireho* Copyright (C) 2000-2010, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: ucnv2022.c 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2000feb03 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Change history: 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 06/29/2000 helena Major rewrite of the callback APIs. 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 08/08/2000 Ram Included support for ISO-2022-JP-2 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Changed implementation of toUnicode 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* function 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 08/21/2000 Ram Added support for ISO-2022-KR 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 08/29/2000 Ram Seperated implementation of EBCDIC to 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ucnvebdc.c 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 09/20/2000 Ram Added support for ISO-2022-CN 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Added implementations for getNextUChar() 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* for specific 2022 country variants. 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 10/31/2000 Ram Implemented offsets logic functions 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uset.h" 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_err.h" 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_cb.h" 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_imp.h" 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnvmbcs.h" 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * I am disabling the generic ISO-2022 converter after proposing to do so on 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the icu mailing list two days ago. 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Reasons: 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * its designation sequences, single shifts with return to the previous state, 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * switch-with-no-return to UTF-16BE or similar, etc. 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is unlike the language-specific variants like ISO-2022-JP which 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * require a much smaller repertoire of ISO-2022 features. 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * These variants continue to be supported. 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. I believe that no one is really using the generic ISO-2022 converter 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but rather always one of the language-specific variants. 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note that ICU's generic ISO-2022 converter has always output one escape 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sequence followed by UTF-8 for the whole stream. 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 3. Switching between subcharsets is extremely slow, because each time 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the previous converter is closed and a new one opened, 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * without any kind of caching, least-recently-used list, etc. 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4. The code is currently buggy, and given the above it does not seem 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reasonable to spend the time on maintenance. 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings. 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This means, for example, that when ISO-8859-7 is designated, the following 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff. 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The ICU ISO-2022 converter does not handle this - and has no information 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * about which subconverter would have to be shifted vs. which is designed 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for 7-bit ISO-2022. 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Markus Scherer 2003-dec-03 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char SHIFT_IN_STR[] = "\x0F"; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char SHIFT_OUT_STR[] = "\x0E"; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CR 0x0D 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LF 0x0A 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define H_TAB 0x09 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define V_TAB 0x0B 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SPACE 0x20 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA_START=0xff61, 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA_END=0xff9f 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 94-character sets with native byte values A1..FE are encoded in ISO 2022 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * as bytes 21..7E. (Subtract 0x80.) 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 96-character sets with native byte values A0..FF are encoded in ISO 2022 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * as bytes 20..7F. (Subtract 0x80.) 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Do not encode C1 control codes with native bytes 80..9F 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * as bytes 00..1F (C0 control codes). 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GR94_START=0xa1, 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GR94_END=0xfe, 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GR96_START=0xa0, 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GR96_END=0xff 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ISO 2022 control codes must not be converted from Unicode 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * because they would mess up the byte stream. 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * corresponding to SO, SI, and ESC. 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0) 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* for ISO-2022-JP and -CN implementations */ 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum { 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* shared values */ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru INVALID_STATE=-1, 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ASCII = 0, 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SS2_STATE=0x10, 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SS3_STATE, 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JP */ 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO8859_1 = 1 , 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO8859_7 = 2 , 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX201 = 3, 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX208 = 4, 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX212 = 5, 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GB2312 =6, 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru KSC5601 =7, 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */ 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CN */ 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the first few enum constants must keep their values because they correspond to myConverterArray[] */ 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GB2312_1=1, 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_IR_165=2, 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643=3, 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * these are used in StateEnum and ISO2022State variables, 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but CNS_11643 must be used to index into myConverterArray[] 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_0=0x20, 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1, 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_2, 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_3, 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_4, 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_5, 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_6, 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_7 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} StateEnum; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* is the StateEnum charset value for a DBCS charset? */ 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CSM(cs) ((uint16_t)1<<(cs)) 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note: The converter uses some leniency: 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * all versions, not just JIS7 and JIS8. 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - ICU does not distinguish between different versions of JIS X 0208. 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 16885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoenum { MAX_JA_VERSION=4 }; 16985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum { 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ASCII1=0, 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru LATIN1, 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SBCS, 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru DBCS, 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru MBCS, 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}Cnv2022Type; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct ISO2022State { 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */ 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */ 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t prevG; /* g before single shift (SS2 or SS3) */ 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} ISO2022State; 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_OPTIONS_VERSION_MASK 0xf 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_2022_MAX_CONVERTERS 10 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct{ 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS]; 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *currentConverter; 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Cnv2022Type currentType; 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State toU2022State, fromU2022State; 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t key; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t version; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isFirstBuffer; 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 20585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UBool isEmptySegment; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char name[30]; 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char locale[3]; 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}UConverterDataISO2022; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Protos */ 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* ISO-2022 ----------------------------------------------------------------- */ 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*Forward declaration */ 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args, 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode * err); 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args, 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode * err); 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define ESC_2022 0x1B /*ESC*/ 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/ 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/ 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/ 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/ 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} UCNV_TableStates_2022; 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The way these state transition arrays work is: 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ex : ESC$B is the sequence for JISX208 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* a) First Iteration: char is ESC 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* int x = normalize_esq_chars_2022[27] which is equal to 1 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) Search for this value in escSeqStateTable_Key_2022[] 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* value of x is stored at escSeqStateTable_Key_2022[0] 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Save this index as offset 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iv) Get state of this sequence from escSeqStateTable_Value_2022[] 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* b) Switch on this state and continue to next char 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* which is normalize_esq_chars_2022[36] == 4 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) x is currently 1(from above) 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* x<<=5 -- x is now 32 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* x+=normalize_esq_chars_2022[36] 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* now x is 36 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Search for this value in escSeqStateTable_Key_2022[] 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iv) Get state of this sequence from escSeqStateTable_Value_2022[] 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* c) Switch on this state and continue to next char 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) x is currently 36 (from above) 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* x<<=5 -- x is now 1152 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* x+=normalize_esq_chars_2022[66] 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* now x is 1161 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Search for this value in escSeqStateTable_Key_2022[] 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iv) Get state of this sequence from escSeqStateTable_Value_2022[21] 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*Below are the 3 arrays depicting a state transition table*/ 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t normalize_esq_chars_2022[256] = { 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the generic ISO-2022 converter is completely removed, not just disabled 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * per #ifdef, then the following state table and the associated tables that are 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * dimensioned with MAX_STATES_2022 should be trimmed. 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the associated escape sequences starting with ESC ( B should be removed. 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This includes the ones with key values 1097 and all of the ones above 1000000. 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For the latter, the tables can simply be truncated. 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For the former, since the tables must be kept parallel, it is probably best 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to simply duplicate an adjacent table cell, parallel in all tables. 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * It may make sense to restructure the tables, especially by using small search 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * tables for the variants instead of indexing them parallel to the table here. 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_STATES_2022 74 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = { 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,35947631 ,35947635 ,35947636 ,35947638 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = { 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0 1 2 3 4 5 6 7 8 9 */ 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1" 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8" 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383" 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165" 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1" 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = { 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Type def for refactoring changeState_2022 code*/ 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum{ 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022=0, 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022_JP=1, 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022_KR=2, 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022_CN=3 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} Variant2022; 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*********** ISO 2022 Converter Protos ***********/ 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 37485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close(UConverter *converter); 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022Reset(UConverter *converter, UConverterResetChoice choice); 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022getName(const UConverter* cnv); 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err); 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UConverter * 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status); 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruT_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err); 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*const UConverterSharedData _ISO2022Data;*/ 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterSharedData _ISO2022JPData; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterSharedData _ISO2022KRData; 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterSharedData _ISO2022CNData; 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** Converter implementations ******************/ 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The purpose of this function is to get around gcc compiler warnings. */ 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE void 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufromUWriteUInt8(UConverter *cnv, 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *bytes, int32_t length, 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t **target, const char *targetLimit, 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *targetChars = (char *)*target; 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit, 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, sourceIndex, pErrorCode); 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target = (uint8_t*)targetChars; 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE void 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusetInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){ 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->version == 1) { 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = myConverterData->currentConverter; 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0; /* offset */ 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=0; /* state */ 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; /* byteIndex */ 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE void 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusetInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){ 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* in ISO-2022-KR the designator sequence appears only once 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in a file so we append it only once 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( converter->charErrorBufferLength==0){ 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBufferLength = 4; 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[0] = 0x1b; 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[1] = 0x24; 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[2] = 0x29; 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[3] = 0x43; 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->version == 1) { 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = myConverterData->currentConverter; 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=0; 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=1; /* prevLength */ 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 45285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char myLocale[6]={' ',' ',' ',' ',' ',' '}; 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022)); 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->extraInfo != NULL) { 45885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterNamePieces stackPieces; 45985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo; 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t version; 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable; 46485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022)); 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentType = ASCII1; 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus =FALSE; 46885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->locale){ 46985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)); 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 47185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho version = pArgs->options & UCNV_OPTIONS_VERSION_MASK; 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->version = version; 47327f654740f2a26ad62a5c155af9199af9e69b889claireho 47485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* BEGIN android-changed */ 475b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru /* The "jk" locale ID was made up for KDDI ISO-2022-JP. */ 476b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru /* The "js" locale ID was made up for SoftBank ISO-2022-JP. */ 477b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru if((myLocale[0]=='j' && 478b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru (myLocale[1]=='a'|| myLocale[1]=='p' || myLocale[1]=='k' || 479b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru myLocale[1]=='s') && 480b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru (myLocale[2]=='_' || myLocale[2]=='\0'))) 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru size_t len=0; 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* open the required converters and cache them */ 48485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(version>MAX_JA_VERSION) { 48585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* prevent indexing beyond jpCharsetMasks[] */ 48685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->version = version = 0; 48785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 48985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[ISO8859_7] = 49085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 492b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru if (myLocale[1]=='k') { /* Use KDDI's version. */ 49385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[JISX208] = 49485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("kddi-jisx-208-2007", &stackPieces, &stackArgs, errorCode); 495b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru } else if (myLocale[1]=='s') { /* Use SoftBank's version. */ 49685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[JISX208] = 49785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("softbank-jisx-208-2007", &stackPieces, &stackArgs, errorCode); 498b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru } else { 49985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[JISX208] = 50085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode); 501b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru } 50285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* END android-changed */ 503ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[version]&CSM(JISX212)) { 50585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[JISX212] = 50685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode); 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[version]&CSM(GB2312)) { 50985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[GB2312] = 510ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* BEGIN android-changed */ 51127f654740f2a26ad62a5c155af9199af9e69b889claireho ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */ 512ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* END android-changed */ 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[version]&CSM(KSC5601)) { 51585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[KSC5601] = 51685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode); 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the function pointers to appropriate funtions */ 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(myConverterData->locale,"ja"); 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version="); 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = uprv_strlen(myConverterData->name); 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->name[len+1]='\0'; 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (myLocale[2]=='_' || myLocale[2]=='\0')) 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 53185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char *cnvName; 53285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(version==1) { 53385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnvName="icu-internal-25546"; 53485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 535ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* BEGIN android-changed */ 536e53684e98c4bb65676a4b8b81a50ce4a0304aedcClaire Ho cnvName="ksc_5601"; 537ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* END android-changed */ 53885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->version=version=0; 53985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 54085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->onlyTestIsLoadable) { 54185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carries result */ 54285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(cnv->extraInfo); 54385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->extraInfo=NULL; 54485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 54585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 54685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->currentConverter=ucnv_open(cnvName, errorCode); 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*errorCode)) { 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close(cnv); 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(version==1) { 55385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1"); 55485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4); 55585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->subCharLen = myConverterData->currentConverter->subCharLen; 55685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho }else{ 55785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0"); 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* initialize the state variables */ 56185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setInitialStateToUnicodeKR(cnv, myConverterData); 56285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setInitialStateFromUnicodeKR(cnv, myConverterData); 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set the function pointers to appropriate funtions */ 56585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; 56685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_strcpy(myConverterData->locale,"ko"); 56785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&& 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (myLocale[2]=='_' || myLocale[2]=='\0')) 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* open the required converters and cache them */ 574ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* BEGIN android-changed */ 57585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[GB2312_1] = 576e53684e98c4bb65676a4b8b81a50ce4a0304aedcClaire Ho ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(version==1) { 57885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[ISO_IR_165] = 579e53684e98c4bb65676a4b8b81a50ce4a0304aedcClaire Ho ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode); 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 58185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[CNS_11643] = 582e53684e98c4bb65676a4b8b81a50ce4a0304aedcClaire Ho ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode); 583ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* END android-changed */ 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the function pointers to appropriate funtions */ 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(myConverterData->locale,"cn"); 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 59027f654740f2a26ad62a5c155af9199af9e69b889claireho if (version==0){ 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->version = 0; 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0"); 59327f654740f2a26ad62a5c155af9199af9e69b889claireho }else if (version==1){ 59427f654740f2a26ad62a5c155af9199af9e69b889claireho myConverterData->version = 1; 59527f654740f2a26ad62a5c155af9199af9e69b889claireho (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1"); 59627f654740f2a26ad62a5c155af9199af9e69b889claireho }else { 59727f654740f2a26ad62a5c155af9199af9e69b889claireho myConverterData->version = 2; 59827f654740f2a26ad62a5c155af9199af9e69b889claireho (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->isFirstBuffer = TRUE; 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* append the UTF-8 escape sequence */ 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength = 3; 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[0] = 0x1b; 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[1] = 0x25; 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[2] = 0x42; 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->sharedData=(UConverterSharedData*)&_ISO2022Data; 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize the state variables */ 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(myConverterData->name,"ISO_2022"); 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *errorCode = U_UNSUPPORTED_ERROR; 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar; 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 62285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close(cnv); 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *errorCode = U_MEMORY_ALLOCATION_ERROR; 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022Close(UConverter *converter) { 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo); 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData **array = myData->myConverterArray; 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (converter->extraInfo != NULL) { 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*close the array of converter pointers and free the memory*/ 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(array[i]!=NULL) { 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_unloadSharedDataIfReady(array[i]); 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(myData->currentConverter); 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!converter->isExtraLocal){ 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free (converter->extraInfo); 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->extraInfo = NULL; 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) { 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo); 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State)); 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->key = 0; 66085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->isEmptySegment = FALSE; 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State)); 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->locale[0] == 0){ 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->isFirstBuffer = TRUE; 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->key = 0; 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (converter->mode == UCNV_SO){ 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close (myConverterData->currentConverter); 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter=NULL; 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->mode = UCNV_SI; 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* re-append UTF-8 escape sequence */ 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBufferLength = 3; 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[0] = 0x1b; 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[1] = 0x28; 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[2] = 0x42; 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset the state variables */ 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->locale[0] == 'k'){ 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setInitialStateToUnicodeKR(converter, myConverterData); 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setInitialStateFromUnicodeKR(converter, myConverterData); 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022getName(const UConverter* cnv){ 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->extraInfo){ 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo; 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return myData->name; 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** to unicode *******************/ 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**************************************************************************** 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Recognized escape sequences are 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>(B ASCII 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>.A ISO-8859-1 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>.F ISO-8859-7 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>(J JISX-201 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>(I JISX-201 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$B JISX-208 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$@ JISX-208 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$(D JISX-212 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$A GB2312 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$(C KSC5601 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 72385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** to unicode *******************/ 73685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UCNV_TableStates_2022 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetKey_2022(char c,int32_t* key,int32_t* offset){ 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t togo; 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t low = 0; 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t hi = MAX_STATES_2022; 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t oldmid=0; 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru togo = normalize_esq_chars_2022[(uint8_t)c]; 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(togo == 0) { 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not a valid character anywhere in an escape sequence */ 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *key = 0; 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offset = 0; 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return INVALID_2022; 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru togo = (*key << 5) + togo; 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (hi != low) /*binary search*/{ 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru register int32_t mid = (hi+low) >> 1; /*Finds median*/ 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mid == oldmid) 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (escSeqStateTable_Key_2022[mid] > togo){ 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru hi = mid; 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if (escSeqStateTable_Key_2022[mid] < togo){ 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru low = mid; 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else /*we found it*/{ 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *key = togo; 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offset = mid; 78185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid]; 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru oldmid = mid; 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *key = 0; 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offset = 0; 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return INVALID_2022; 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*runs through a state machine to determine the escape sequence - codepage correspondance 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruchangeState_2022(UConverter* _this, 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char** source, 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* sourceLimit, 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Variant2022 var, 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_TableStates_2022 value; 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo); 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t key = myData2022->key; 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offset = 0; 80485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t initialToULength = _this->toULength; 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char c; 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = VALID_NON_TERMINAL_2022; 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (*source < sourceLimit) { 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = *(*source)++; 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->toUBytes[_this->toULength++]=(uint8_t)c; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = getKey_2022(c,(int32_t *) &key, &offset); 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (value){ 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case VALID_NON_TERMINAL_2022 : 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with the loop */ 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case VALID_TERMINAL_2022: 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru key = 0; 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto DONE; 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case INVALID_2022: 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto DONE; 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case VALID_MAYBE_TERMINAL_2022: 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ESC ( B is ambiguous only for ISO_2022 itself */ 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(var == ISO_2022) { 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */ 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->toULength = 0; 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */ 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with the loop */ 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = VALID_NON_TERMINAL_2022; 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not ISO_2022 itself, finish here */ 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = VALID_TERMINAL_2022; 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru key = 0; 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto DONE; 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruDONE: 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->key = key; 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (value == VALID_NON_TERMINAL_2022) { 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* indicate that the escape sequence is incomplete: key!=0 */ 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (value == INVALID_2022 ) { 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* value == VALID_TERMINAL_2022 */ { 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(var){ 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_2022: 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *chosenConverterName = escSeqStateTable_Result_2022[offset]; 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(chosenConverterName == NULL) { 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* SS2 or SS3 */ 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 86685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _this->toUCallbackReason = UCNV_UNASSIGNED; 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->mode = UCNV_SI; 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(myData2022->currentConverter); 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err); 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*err)) { 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->mode = UCNV_SO; 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_2022_JP: 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 88285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset]; 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(tempState) { 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case INVALID_STATE: 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case SS2_STATE: 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.cs[2]!=0) { 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.g<2) { 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.prevG=myData2022->toU2022State.g; 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.g=2; 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* illegal to have SS2 before a matching designator */ 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case SS3_STATE: not used in ISO-2022-JP-x */ 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_1: 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_7: 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G2 charset for SS2 */ 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[2]=(int8_t)tempState; 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 charset */ 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[0]=(int8_t)tempState; 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_2022_CN: 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 92185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(tempState) { 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case INVALID_STATE: 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case SS2_STATE: 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.cs[2]!=0) { 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.g<2) { 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.prevG=myData2022->toU2022State.g; 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.g=2; 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* illegal to have SS2 before a matching designator */ 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case SS3_STATE: 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.cs[3]!=0) { 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.g<2) { 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.prevG=myData2022->toU2022State.g; 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.g=3; 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* illegal to have SS3 before a matching designator */ 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_IR_165: 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->version==0) { 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*fall through*/ 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case GB2312_1: 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*fall through*/ 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case CNS_11643_1: 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[1]=(int8_t)tempState; 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case CNS_11643_2: 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[2]=(int8_t)tempState; 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* other CNS 11643 planes */ 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->version==0) { 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[3]=(int8_t)tempState; 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_2022_KR: 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offset==0x30){ 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* nothing to be done, just accept this one escape sequence */ 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*err)) { 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->toULength = 0; 98885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { 98985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(_this->toULength>1) { 99085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 99185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Ticket 5691: consistent illegal sequences: 99285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - We include at least the first byte (ESC) in the illegal sequence. 99385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - If any of the non-initial bytes could be the start of a character, 99485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * we stop the illegal sequence before the first one of those. 99585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In escape sequences, all following bytes are "printable", that is, 99685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS), 99785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * they are valid single/lead bytes. 99885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * For simplicity, we always only report the initial ESC byte as the 99985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * illegal sequence and back out all other bytes we looked at. 100085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 100185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Back out some bytes. */ 100285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t backOutDistance=_this->toULength-1; 100385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t bytesFromThisBuffer=_this->toULength-initialToULength; 100485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(backOutDistance<=bytesFromThisBuffer) { 100585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* same as initialToULength<=1 */ 100685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *source-=backOutDistance; 100785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 100885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Back out bytes from the previous buffer: Need to replay them. */ 100985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); 101085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* same as -(initialToULength-1) */ 101185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* preToULength is negative! */ 101285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength); 101385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *source-=bytesFromThisBuffer; 101485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 101585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _this->toULength=1; 101685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 101785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { 101885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _this->toUCallbackReason = UCNV_UNASSIGNED; 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*Checks the characters of the buffer against valid 2022 escape sequences 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*if the match we return a pointer to the initial start of the sequence otherwise 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*we return sourceLimit 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*for 2022 looks ahead in the stream 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *to determine the longest possible convertible 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *data stream 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE const char* 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetEndOfBuffer_2022(const char** source, 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* sourceLimit, 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool flush){ 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* mySource = *source; 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*source >= sourceLimit) 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return sourceLimit; 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do{ 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*mySource == ESC_2022){ 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t i; 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t key = 0; 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offset; 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022; 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Kludge: I could not 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * figure out the reason for validating an escape sequence 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * twice - once here and once in changeState_2022(). 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is it possible to have an ESC character in a ISO2022 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * byte stream which is valid in a code page? Is it legal? 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=0; 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022); 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i++) { 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = getKey_2022(*(mySource+i), &key, &offset); 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (value > 0 || *mySource==ESC_2022) 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return mySource; 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ((value == VALID_NON_TERMINAL_2022)&&(!flush) ) 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return sourceLimit; 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }while (++mySource < sourceLimit); 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return sourceLimit; 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(mySource < sourceLimit && *mySource != ESC_2022) { 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++mySource; 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return mySource; 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * any future change in _MBCSFromUChar32() function should be reflected here. 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return number of bytes in *value; negative number if fallback; 0 if no mapping 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE int32_t 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruMBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData, 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t* value, 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback, 1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int outputType) 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const int32_t *cx; 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint16_t *table; 1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t stage2Entry; 1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t myValue; 1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *p; 1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO(markus): Use and require new, faster MBCS conversion table structures. 1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Use internal version of ucnv_open() that verifies that the new structures are available, 1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * else U_INTERNAL_PROGRAM_ERROR. 1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru table=sharedData->mbcs.fromUnicodeTable; 1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the bytes and the length for the output */ 1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(outputType==MBCS_OUTPUT_2){ 1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myValue<=0xff) { 1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=1; 1109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* outputType==MBCS_OUTPUT_3 */ { 1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myValue<=0xff) { 1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=1; 1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(myValue<=0xffff) { 1118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* is this code point assigned, or do we use fallbacks? */ 1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((stage2Entry&(1<<(16+(c&0xf))))!=0) { 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* assigned */ 1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *value=myValue; 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return length; 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) { 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We allow a 0 byte output if the "assigned" bit is set for this entry. 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * There is no way with this data structure for fallback output 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to be a zero byte. 1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *value=myValue; 1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -length; 1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cx=sharedData->mbcs.extIndexes; 1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cx!=NULL) { 1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ucnv_extSimpleMatchFromU(cx, c, value, useFallback); 1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unassigned */ 1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c 1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * any future change in _MBCSSingleFromUChar32() function should be reflected here. 1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param retval pointer to output byte 1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return 1 roundtrip byte 0 no mapping -1 fallback byte 1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE int32_t 1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruMBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData, 1155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, 1156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t* retval, 1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback) 1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint16_t *table; 1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t value; 1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ 1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru table=sharedData->mbcs.fromUnicodeTable; 1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the byte for the output */ 1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); 1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* is this code point assigned, or do we use fallbacks? */ 1170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *retval=(uint32_t)(value&0xff); 1171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value>=0xf00) { 1172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 1; /* roundtrip */ 1173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(useFallback ? value>=0x800 : value>=0xc00) { 1174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; /* fallback taken */ 1175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* no mapping */ 1177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1180c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* 1181c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Check that the result is a 2-byte value with each byte in the range A1..FE 1182c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte 1183c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * to move it to the ISO 2022 range 21..7E. 1184c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Return 0 if out of range. 1185c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1186c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querustatic U_INLINE uint32_t 1187c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru_2022FromGR94DBCS(uint32_t value) { 1188c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && 1189c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) 1190c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ) { 1191c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value - 0x8080; /* shift down to 21..7e byte range */ 1192c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1193c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0; /* not valid for ISO 2022 */ 1194c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1195c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1196c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 119785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */ 119885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 119985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the 120085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point 120185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * unchanged. 120285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 120385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE uint32_t 120485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_2022ToGR94DBCS(uint32_t value) { 120585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t returnValue = value + 0x8080; 120685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) && 120785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) { 120885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return returnValue; 120985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 121085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return value; 121185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 121285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 121385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 121485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 1215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 1216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/********************************************************************************** 1218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ISO-2022 Converter 1219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruT_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, 1225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 1226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* mySourceLimit, *realSourceLimit; 1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* sourceStart; 1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* myTargetStart; 1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter* saveThis; 1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData; 1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t length; 1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveThis = args->converter; 1234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData=((UConverterDataISO2022*)(saveThis->extraInfo)); 1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru realSourceLimit = args->sourceLimit; 1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (args->source < realSourceLimit) { 1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key == 0) { /* are we in the middle of an escape sequence? */ 1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ 1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush); 1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->source < mySourceLimit) { 1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->currentConverter==NULL) { 1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->currentConverter = ucnv_open("ASCII",err); 1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)){ 1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; 1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveThis->mode = UCNV_SO; 1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert to before the ESC or until the end of the buffer */ 1254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->isFirstBuffer=FALSE; 1255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceStart = args->source; 1256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myTargetStart = args->target; 1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter = myData->currentConverter; 1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode(args->converter, 1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &args->target, 1260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->targetLimit, 1261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &args->source, 1262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceLimit, 1263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets, 1264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UBool)(args->flush && mySourceLimit == realSourceLimit), 1265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 1266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter = saveThis; 1267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*err == U_BUFFER_OVERFLOW_ERROR) { 1269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move the overflow buffer */ 1270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength; 1271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->currentConverter->UCharErrorBufferLength = 0; 1272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length > 0) { 1273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(saveThis->UCharErrorBuffer, 1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->currentConverter->UCharErrorBuffer, 1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length*U_SIZEOF_UCHAR); 1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At least one of: 1282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * -Error while converting 1283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * -Done with entire buffer 1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * -Need to write offsets or update the current offset 1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (leave that up to the code in ucnv.c) 1286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or else we just stopped at an ESC byte and continue with changeState_2022() 1288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*err) || 1290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (args->source == realSourceLimit) || 1291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) || 1292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0)) 1293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy partial or error input for truncated detection and error handling */ 1295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)) { 1296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength; 1297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length > 0) { 1298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length); 1299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = saveThis->toULength = myData->currentConverter->toULength; 1302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length > 0) { 1303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length); 1304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->source < mySourceLimit) { 1305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */ 1306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceStart = args->source; 1315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru changeState_2022(args->converter, 1316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &(args->source), 1317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru realSourceLimit, 1318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022, 1319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 1320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) { 1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* let the ucnv.c code update its current offset */ 1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * To Unicode Callback helper function 1331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerutoUnicodeCallback(UConverter *cnv, 1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t sourceChar, const uint32_t targetUniChar, 1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar>0xff){ 1337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0] = (uint8_t)(sourceChar>>8); 1338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1] = (uint8_t)sourceChar; 1339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = 2; 1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 1342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0] =(char) sourceChar; 1343c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru cnv->toULength = 1; 1344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){ 1347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 1348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 1350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_CHAR_FOUND; 1351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**************************************ISO-2022-JP*************************************************/ 1355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/************************************** IMPORTANT ************************************************** 1357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and 1358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32(). 1359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The converter iterates over each Unicode codepoint 1360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is 1361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* processed one char at a time it would make sense to reduce the extra processing a canned converter 1362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* would do as far as possible. 1363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* If the implementation of these macros or structure of sharedData struct change in the future, make 1365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* sure that ISO-2022 is also changed. 1366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*************************************************************************************************** 1367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************************************************************************************************** 1370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Rules for ISO-2022-jp encoding 1371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (i) Escape sequences must be fully contained within a line they should not 1372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* span new lines or CRs 1373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (ii) If the last character on a line is represented by two bytes then an ASCII or 1374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* JIS-Roman character escape sequence should follow before the line terminates 1375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (iii) If the first character on the line is represented by two bytes then a two 1376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* byte character escape sequence should precede it 1377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (iv) If no escape sequence is encountered then the characters are ASCII 1378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2, 1379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and invoked with SS2 (ESC N). 1380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (vi) If there is any G0 designation in text, there must be a switch to 1381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ASCII or to JIS X 0201-Roman before a space character (but not 1382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control 1383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* characters such as tab or CRLF. 1384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (vi) Supported encodings: 1385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7 1386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* source : RFC-1554 1388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* JISX201, JISX208,JISX212 : new .cnv data files created 1390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* KSC5601 : alias to ibm-949 mapping table 1391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* GB2312 : alias to ibm-1386 mapping table 1392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ISO-8859-1 : Algorithmic implemented as LATIN1 case 1393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ISO-8859-7 : alisas to ibm-9409 mapping table 1394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* preference order of JP charsets */ 1397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const StateEnum jpCharsetPref[]={ 1398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ASCII, 1399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX201, 1400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO8859_1, 1401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO8859_7, 1402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX208, 1403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX212, 1404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GB2312, 1405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru KSC5601, 1406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA_7BIT 1407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The escape sequences must be in order of the enum constants like JISX201 = 3, 1411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not in order of jpCharsetPref[]! 1412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char escSeqChars[][6] ={ 1414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x28\x42", /* <ESC>(B ASCII */ 1415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ 1416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */ 1417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x28\x4A", /* <ESC>(J JISX-201 */ 1418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x24\x42", /* <ESC>$B JISX-208 */ 1419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */ 1420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x24\x41", /* <ESC>$A GB2312 */ 1421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */ 1422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */ 1423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 142585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const int8_t escSeqCharsLen[] ={ 1426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>(B ASCII */ 1427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>.A ISO-8859-1 */ 1428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>.F ISO-8859-7 */ 1429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>(J JISX-201 */ 1430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>$B JISX-208 */ 1431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4, /* length of <ESC>$(D JISX-212 */ 1432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>$A GB2312 */ 1433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4, /* length of <ESC>$(C KSC5601 */ 1434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3 /* length of <ESC>(I HWKANA_7BIT */ 1435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The iteration over various code pages works this way: 1439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) Get the currentState from myConverterData->currentState 1440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) Check if the character is mapped to a valid character in the currentState 1441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Yes -> a) set the initIterState to currentState 1442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* b) remain in this state until an invalid character is found 1443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* No -> a) go to the next code page and find the character 1444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Before changing the state increment the current state check if the current state 1445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is equal to the intitIteration state 1446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Yes -> A character that cannot be represented in any of the supported encodings 1447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* break and return a U_INVALID_CHARACTER error 1448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* No -> Continue and find the character in next code page 1449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* TODO: Implement a priority technique where the users are allowed to set the priority of code pages 1452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1454c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* Map 00..7F to Unicode according to JIS X 0201. */ 1455c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querustatic U_INLINE uint32_t 1456c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querujisx201ToU(uint32_t value) { 1457c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value < 0x5c) { 1458c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value; 1459c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(value == 0x5c) { 1460c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0xa5; 1461c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(value == 0x7e) { 1462c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0x203e; 1463c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else /* value <= 0x7f */ { 1464c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value; 1465c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1466c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1467c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1468c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */ 1469c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querustatic U_INLINE uint32_t 1470c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querujisx201FromU(uint32_t value) { 1471c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value<=0x7f) { 1472c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value!=0x5c && value!=0x7e) { 1473c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value; 1474c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1475c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(value==0xa5) { 1476c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0x5c; 1477c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(value==0x203e) { 1478c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0x7e; 1479c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1480c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0xfffe; 1481c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1482c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1483c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* 1484c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Take a valid Shift-JIS byte pair, check that it is in the range corresponding 1485c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * to JIS X 0208, and convert it to a pair of 21..7E bytes. 1486c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Return 0 if the byte pair is out of range. 1487c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1488c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querustatic U_INLINE uint32_t 1489c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru_2022FromSJIS(uint32_t value) { 1490c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru uint8_t trail; 1491c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1492c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value > 0xEFFC) { 1493c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0; /* beyond JIS X 0208 */ 1494c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1495c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1496c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru trail = (uint8_t)value; 1497c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1498c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value &= 0xff00; /* lead byte */ 1499c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value <= 0x9f00) { 1500c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value -= 0x7000; 1501c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else /* 0xe000 <= value <= 0xef00 */ { 1502c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value -= 0xb000; 1503c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1504c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value <<= 1; 1505c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1506c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(trail <= 0x9e) { 1507c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value -= 0x100; 1508c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(trail <= 0x7e) { 1509c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value |= trail - 0x1f; 1510c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1511c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value |= trail - 0x20; 1512c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1513c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else /* trail <= 0xfc */ { 1514c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value |= trail - 0x7e; 1515c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1516c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value; 1517c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1518c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1519c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* 1520c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS. 1521c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * If either byte is outside 21..7E make sure that the result is not valid 1522c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * for Shift-JIS so that the converter catches it. 1523c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Some invalid byte values already turn into equally invalid Shift-JIS 1524c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * byte values and need not be tested explicitly. 1525c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1526c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querustatic U_INLINE void 1527c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) { 1528c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(c1&1) { 1529c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ++c1; 1530c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(c2 <= 0x5f) { 1531c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 += 0x1f; 1532c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(c2 <= 0x7e) { 1533c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 += 0x20; 1534c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1535c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 = 0; /* invalid */ 1536c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1537c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1538c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) { 1539c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 += 0x7e; 1540c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1541c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 = 0; /* invalid */ 1542c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1543c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1544c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c1 >>= 1; 1545c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(c1 <= 0x2f) { 1546c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c1 += 0x70; 1547c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(c1 <= 0x3f) { 1548c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c1 += 0xb0; 1549c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1550c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c1 = 0; /* invalid */ 1551c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1552c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru bytes[0] = (char)c1; 1553c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru bytes[1] = (char)c2; 1554c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1555c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1556c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* 1557c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS) 1558c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Katakana. 1559c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks 1560c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * because Shift-JIS roundtrips half-width Katakana to single bytes. 1561c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * These were the only fallbacks in ICU's jisx-208.ucm file. 1562c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1563c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querustatic const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = { 1564c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2123, /* U+FF61 */ 1565c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2156, 1566c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2157, 1567c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2122, 1568c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2126, 1569c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2572, 1570c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2521, 1571c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2523, 1572c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2525, 1573c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2527, 1574c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2529, 1575c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2563, 1576c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2565, 1577c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2567, 1578c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2543, 1579c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x213C, /* U+FF70 */ 1580c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2522, 1581c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2524, 1582c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2526, 1583c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2528, 1584c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x252A, 1585c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x252B, 1586c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x252D, 1587c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x252F, 1588c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2531, 1589c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2533, 1590c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2535, 1591c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2537, 1592c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2539, 1593c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x253B, 1594c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x253D, 1595c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x253F, /* U+FF80 */ 1596c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2541, 1597c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2544, 1598c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2546, 1599c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2548, 1600c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254A, 1601c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254B, 1602c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254C, 1603c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254D, 1604c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254E, 1605c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254F, 1606c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2552, 1607c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2555, 1608c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2558, 1609c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x255B, 1610c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x255E, 1611c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x255F, /* U+FF90 */ 1612c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2560, 1613c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2561, 1614c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2562, 1615c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2564, 1616c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2566, 1617c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2568, 1618c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2569, 1619c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256A, 1620c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256B, 1621c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256C, 1622c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256D, 1623c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256F, 1624c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2573, 1625c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x212B, 1626c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x212C /* U+FF9F */ 1627c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru}; 1628c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) { 1631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 1632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *converterData; 1633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pFromU2022State; 1634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target = (uint8_t *) args->target; 1635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; 1636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* source = args->source; 1637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* sourceLimit = args->sourceLimit; 1638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t* offsets = args->offsets; 1639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 sourceChar; 1640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char buffer[8]; 1641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len, outLen; 1642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t choices[10]; 1643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t choiceCount; 1644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetValue = 0; 1645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback; 1646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 1648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs, g; 1649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the state */ 1651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData = (UConverterDataISO2022*)cnv->extraInfo; 1652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State = &converterData->fromU2022State; 1653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 1655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check if the last codepoint of previous buffer was a lead surrogate*/ 1657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { 1658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 1659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source < sourceLimit) { 1662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < targetLimit) { 1663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar = *(source++); 1665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*check if the char is a First surrogate*/ 1666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE(sourceChar)) { 1667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(sourceChar)) { 1668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 1669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*look ahead to find the trail surrogate*/ 1670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source < sourceLimit) { 1671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail=(UChar) *source; 1673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SECOND_SURROGATE(trail)) { 1674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source++; 1675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail); 1676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=0x00; 1677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this supplementary code point */ 1678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 1683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 1695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not convert SO/SI/ESC */ 1701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(IS_2022_CONTROL(sourceChar)) { 1702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 1704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do the conversion */ 1709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choiceCount == 0) { 1711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t csm; 1712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The csm variable keeps track of which charsets are allowed 1715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and not used yet while building the choices[]. 1716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm = jpCharsetMasks[converterData->version]; 1718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 1719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS7/8: try single-byte half-width Katakana before JISX208 */ 1721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(converterData->version == 3 || converterData->version == 4) { 1722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[choiceCount++] = (int8_t)HWKANA_7BIT; 1723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Do not try single-byte half-width Katakana for other versions. */ 1725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm &= ~CSM(HWKANA_7BIT); 1726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try the current G0 charset */ 1728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[choiceCount++] = cs = pFromU2022State->cs[0]; 1729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm &= ~CSM(cs); 1730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try the current G2 charset */ 1732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((cs = pFromU2022State->cs[2]) != 0) { 1733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[choiceCount++] = cs; 1734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm &= ~CSM(cs); 1735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try all the other possible charsets */ 1738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) { 1739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (int8_t)jpCharsetPref[i]; 1740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(CSM(cs) & csm) { 1741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[choiceCount++] = cs; 1742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm &= ~CSM(cs); 1743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = g = 0; 1748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len==0: no mapping found yet 1750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks 1751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len>0: found a roundtrip result, done 1752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 0; 1754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We will turn off useFallback after finding a fallback, 1756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but we still get fallbacks from PUA code points as usual. 1757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Therefore, we will also need to check that we don't overwrite 1758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * an early fallback with a later one. 1759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = cnv->useFallback; 1761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < choiceCount && len <= 0; ++i) { 1763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 1764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len2; 1765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs0 = choices[i]; 1766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(cs0) { 1767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ASCII: 1768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar <= 0x7f) { 1769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = (uint32_t)sourceChar; 1770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 1771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 0; 1773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_1: 1776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(GR96_START <= sourceChar && sourceChar <= GR96_END) { 1777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = (uint32_t)sourceChar - 0x80; 1778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 1779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 2; 1781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case HWKANA_7BIT: 1784c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { 1785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(converterData->version==3) { 1786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS7: use G1 (SO) */ 1787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Shift U+FF61..U+FF9F to bytes 21..5F. */ 1788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21)); 1789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 1790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */ 1791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 1; 1792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(converterData->version==4) { 1793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */ 1794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Shift U+FF61..U+FF9F to bytes A1..DF. */ 1795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1)); 1796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 1797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = pFromU2022State->cs[0]; 1799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(IS_JP_DBCS(cs)) { 1800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* switch from a DBCS charset to JISX201 */ 1801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (int8_t)JISX201; 1802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* else stay in the current G0 charset */ 1804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 0; 1805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* else do not use HWKANA_7BIT with other versions */ 1807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case JISX201: 1810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 SBCS */ 1811c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value = jisx201FromU(sourceChar); 1812c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value <= 0x7f) { 1813c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru targetValue = value; 1814c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru len = 1; 1815c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru cs = cs0; 1816c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru g = 0; 1817c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru useFallback = FALSE; 1818c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1819c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1820c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case JISX208: 1821c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* G0 DBCS from Shift-JIS table */ 1822c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru len2 = MBCS_FROM_UCHAR32_ISO2022( 1823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[cs0], 1824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, &value, 1825c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru useFallback, MBCS_OUTPUT_2); 1826c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 1827c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value = _2022FromSJIS(value); 1828c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value != 0) { 1829c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru targetValue = value; 1830c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru len = len2; 1831c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru cs = cs0; 1832c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru g = 0; 1833c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru useFallback = FALSE; 1834c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1835c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(len == 0 && useFallback && 1836c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { 1837c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru targetValue = hwkana_fb[sourceChar - HWKANA_START]; 1838c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru len = -2; 1839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 0; 1841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 1842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_7: 1845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 SBCS forced to 7-bit output */ 1846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len2 = MBCS_SINGLE_FROM_UCHAR32( 1847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[cs0], 1848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, &value, 1849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback); 1850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) { 1851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = value - 0x80; 1852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = len2; 1853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 2; 1855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 1856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 DBCS */ 1860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len2 = MBCS_FROM_UCHAR32_ISO2022( 1861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[cs0], 1862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, &value, 1863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, MBCS_OUTPUT_2); 1864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 1865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs0 == KSC5601) { 1866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Check for valid bytes for the encoding scheme. 1868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is necessary because the sub-converter (windows-949) 1869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * has a broader encoding scheme than is valid for 2022. 1870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1871c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value = _2022FromGR94DBCS(value); 1872c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value == 0) { 1873c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = value; 1877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = len2; 1878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 0; 1880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 1881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len != 0) { 1887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len < 0) { 1888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = -len; /* fallback */ 1889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen = 0; /* count output bytes */ 1891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write SI if necessary (only for JIS7) */ 1893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g == 1 && g == 0) { 1894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = UCNV_SI; 1895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 1896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the designation sequence if necessary */ 1899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs != pFromU2022State->cs[g]) { 1900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t escLen = escSeqCharsLen[cs]; 1901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen); 1902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen += escLen; 1903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[g] = cs; 1904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* invalidate the choices[] */ 1906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 1907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the shift sequence if necessary */ 1910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(g != pFromU2022State->g) { 1911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(g) { 1912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case 0 handled before writing escapes */ 1913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 1914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = UCNV_SO; 1915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 1; 1916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: /* case 2 */ 1918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = 0x1b; 1919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = 0x4e; 1920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no case 3: no SS3 in ISO-2022-JP-x */ 1922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the output bytes */ 1926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len == 1) { 1927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = (char)targetValue; 1928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* len == 2 */ { 1929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = (char)(targetValue >> 8); 1930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = (char)targetValue; 1931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if we cannot find the character after checking all codepages 1935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * then this is an error 1936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 1938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar == CR || sourceChar == LF) { 1943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */ 1944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[2] = 0; 1945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 1946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output outLen>0 bytes in buffer[] */ 1949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(outLen == 1) { 1950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[0]; 1951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) { 1952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */ 1953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(outLen == 2 && (target + 2) <= targetLimit) { 1955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[0]; 1956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[1]; 1957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) { 1958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)); 1959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = sourceIndex; 1960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = sourceIndex; 1961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 1964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 1965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer, outLen, 1966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 1967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)), 1968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 1969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)) { 1970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* end if(myTargetIndex<myTargetLength) */ 1974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 1975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 1976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }/* end while(mySourceIndex<mySourceLength) */ 1980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the end of the input stream and detection of truncated input 1983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are handled by the framework, but for ISO-2022-JP conversion 1984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to be in ASCII mode at the very end 1985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conditions: 1987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * successful 1988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in SO mode or not in ASCII mode 1989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of input and no truncated input 1990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U_SUCCESS(*err) && 1992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) && 1993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->flush && source>=sourceLimit && cnv->fromUChar32==0 1994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex; 1996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen = 0; 1998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g != 0) { 2000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = UCNV_SI; 2001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 2002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->cs[0] != ASCII) { 2005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t escLen = escSeqCharsLen[ASCII]; 2006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen); 2007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen += escLen; 2008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[0] = (int8_t)ASCII; 2009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the source index of the last input character */ 2012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO this would be simpler and more reliable if we used a pair 2014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of sourceIndex/prevSourceIndex like in ucnvmbcs.c 2015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so that we could simply use the prevSourceIndex here; 2016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this code gives an incorrect result for the rare case of an unmatched 2017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trail surrogate that is alone in the last buffer of the text stream 2018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=(int32_t)(source-args->source); 2020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceIndex>0) { 2021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 2022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U16_IS_TRAIL(args->source[sourceIndex]) && 2023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) 2024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 2025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 2026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=-1; 2029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 2032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 2033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer, outLen, 2034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 2035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 2036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 2037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*save the state and return */ 2040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = source; 2041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = (char*)target; 2042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** to unicode *******************/ 2045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, 2048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 2049c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru char tempBuf[2]; 2050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySource = (char *) args->source; 2051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *myTarget = args->target; 2052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySourceLimit = args->sourceLimit; 2053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetUniChar = 0x0000; 2054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t mySourceChar = 0x0000; 205585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t tmpSourceChar = 0x0000; 2056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData; 2057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pToU2022State; 2058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru StateEnum cs; 2059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData=(UConverterDataISO2022*)(args->converter->extraInfo); 2061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State = &myData->toU2022State; 2062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key != 0) { 2064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial escape sequence */ 2065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto escape; 2066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { 2067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial double-byte character */ 2068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar = args->converter->toUBytes[0]; 2069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 0; 2070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; 207185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = missingCharMarker; 2072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailByte; 2073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(mySource < mySourceLimit){ 2076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar =missingCharMarker; 2078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget < args->targetLimit){ 2080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar= (unsigned char) *mySource++; 2082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(mySourceChar) { 2084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UCNV_SI: 2085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->version==3) { 2086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=0; 2087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ 209085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* reset this, we have a different error */ 2091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UCNV_SO: 2095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->version==3) { 2096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS7: switch to G1 half-width Katakana */ 2097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->cs[1] = (int8_t)HWKANA_7BIT; 2098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=1; 2099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ 210285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* reset this, we have a different error */ 2103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ESC_2022: 2107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySource--; 2108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruescape: 210985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho { 211085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char * mySourceBefore = mySource; 211185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t toULengthBefore = args->converter->toULength; 211285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 211385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho changeState_2022(args->converter,&(mySource), 211485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceLimit, ISO_2022_JP,err); 211585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 211685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */ 211785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { 211885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *err = U_ILLEGAL_ESCAPE_SEQUENCE; 211985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUCallbackReason = UCNV_IRREGULAR; 212050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore)); 212185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 212285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 2123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* invalid or illegal escape sequence */ 2125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)){ 2126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 2127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 212885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ 2129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 213185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* If we successfully completed an escape sequence, we begin a new segment, empty so far */ 213285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(myData->key==0) { 213385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = TRUE; 213485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 2135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */ 2138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case CR: 2140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*falls through*/ 2141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case LF: 2142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* automatically reset to single-byte mode */ 2143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) { 2144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->cs[0] = (int8_t)ASCII; 2145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->cs[2] = 0; 2147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g = 0; 2148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* falls through */ 2149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 2150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert one or two bytes */ 215185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; 2152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; 2153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 && 2154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru !IS_JP_DBCS(cs) 2155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 2156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */ 2157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = mySourceChar + (HWKANA_START - 0xa1); 2158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return from a single-shift state to the previous one */ 2160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pToU2022State->g >= 2) { 2161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=pToU2022State->prevG; 2162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else switch(cs) { 2164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ASCII: 2165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 2166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = mySourceChar; 2167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_1: 2170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 2171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = mySourceChar + 0x80; 2172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return from a single-shift state to the previous one */ 2174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=pToU2022State->prevG; 2175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_7: 2177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 2178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert mySourceChar+0x80 to use a normal 8-bit table */ 2179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = 2180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( 2181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->myConverterArray[cs], 2182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar + 0x80); 2183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return from a single-shift state to the previous one */ 2185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=pToU2022State->prevG; 2186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case JISX201: 2188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 2189c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru targetUniChar = jisx201ToU(mySourceChar); 2190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case HWKANA_7BIT: 2193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { 2194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 7-bit halfwidth Katakana */ 2195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = mySourceChar + (HWKANA_START - 0x21); 2196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 2199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 DBCS */ 2200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySource < mySourceLimit) { 220185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int leadIsOk, trailIsOk; 220285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint8_t trailByte; 2203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailByte: 220485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailByte = (uint8_t)*mySource; 220585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 220685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Ticket 5691: consistent illegal sequences: 220785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - We include at least the first byte in the illegal sequence. 220885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - If any of the non-initial bytes could be the start of a character, 220985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * we stop the illegal sequence before the first one of those. 221085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 221185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is 221285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * an ESC/SO/SI, we report only the first byte as the illegal sequence. 221385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Otherwise we convert or report the pair of bytes. 221485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 221585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); 221685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); 221785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (leadIsOk && trailIsOk) { 221885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 221985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tmpSourceChar = (mySourceChar << 8) | trailByte; 222085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(cs == JISX208) { 222185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf); 222285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = tmpSourceChar; 222385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 222485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */ 222585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = tmpSourceChar; 222685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (cs == KSC5601) { 222785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */ 222885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 222985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[0] = (char)(tmpSourceChar >> 8); 223085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[1] = (char)(tmpSourceChar); 223185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 223285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); 223385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 223485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 223585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 223685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* add another bit so that the code below writes 2 bytes in case of error */ 223785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; 2238c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 2239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toUBytes[0] = (uint8_t)mySourceChar; 2241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 1; 2242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 2243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* End of inner switch */ 2245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* End of outer switch */ 2247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){ 2248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 2249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 2250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++)=(UChar)targetUniChar; 2252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if(targetUniChar > missingCharMarker){ 2254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* disassemble the surrogate pair and write to output*/ 2255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar-=0x0010000; 2256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10)); 2257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 2258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 2259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++myTarget; 2261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget< args->targetLimit){ 2262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); 2263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 2264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 2265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++myTarget; 2267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= 2269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); 2270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Call the callback function*/ 2275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); 2276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */ 2280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 2281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 2285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 2286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 2287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************************************************************** 2291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Rules for ISO-2022-KR encoding 2292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) The KSC5601 designator sequence should appear only once in a file, 2293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* at the begining of a line before any KSC5601 characters. This usually 2294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* means that it appears by itself on the first line of the file 2295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) There are only 2 shifting sequences SO to shift into double byte mode 2296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and SI to shift into single byte mode 2297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 2298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter* saveConv = args->converter; 2302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo; 2303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter=myConverterData->currentConverter; 2304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32; 2306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_MBCSFromUnicodeWithOffsets(args,err); 2307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32; 2308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*err == U_BUFFER_OVERFLOW_ERROR) { 2310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->currentConverter->charErrorBufferLength > 0) { 2311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy( 2312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveConv->charErrorBuffer, 2313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBuffer, 2314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBufferLength); 2315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength; 2317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBufferLength = 0; 2318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter=saveConv; 2320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source = args->source; 2326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *sourceLimit = args->sourceLimit; 2327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned char *target = (unsigned char *) args->target; 2328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned char *targetLimit = (unsigned char *) args->targetLimit; 2329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t* offsets = args->offsets; 2330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetByteUnit = 0x0000; 2331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 sourceChar = 0x0000; 2332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isTargetByteDBCS; 2333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool oldIsTargetByteDBCS; 2334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *converterData; 2335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData* sharedData; 2336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback; 2337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length =0; 2338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData=(UConverterDataISO2022*)args->converter->extraInfo; 2340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* if the version is 1 then the user is requesting 2341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conversion with ibm-25546 pass the arguments to 2342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * MBCS converter and return 2343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(converterData->version==1){ 2345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err); 2346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize data */ 2350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sharedData = converterData->currentConverter->sharedData; 2351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = args->converter->useFallback; 2352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus; 2353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru oldIsTargetByteDBCS = isTargetByteDBCS; 2354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus; 2356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) { 2357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 2358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source < sourceLimit){ 2360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetByteUnit = missingCharMarker; 2362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < (unsigned char*) args->targetLimit){ 2364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar = *source++; 2365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not convert SO/SI/ESC */ 2367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(IS_2022_CONTROL(sourceChar)) { 2368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUChar32=sourceChar; 2371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2); 2375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length < 0) { 2376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = -length; /* fallback */ 2377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only DBCS or SBCS characters are expected*/ 2379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* DB characters with high bit set to 1 are expected */ 238085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if( length > 2 || length==0 || 238185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (length == 1 && targetByteUnit > 0x7f) || 238285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (length == 2 && 238385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) || 238485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1))) 238585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ) { 2386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetByteUnit=missingCharMarker; 2387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (targetByteUnit != missingCharMarker){ 2389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru oldIsTargetByteDBCS = isTargetByteDBCS; 2391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF); 2392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* append the shift sequence */ 2393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (oldIsTargetByteDBCS != isTargetByteDBCS ){ 2394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (isTargetByteDBCS) 2396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = UCNV_SO; 2397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 2398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = UCNV_SI; 2399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) 2400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(offsets++) = (int32_t)(source - args->source-1); 2401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the targetUniChar to target */ 2403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetByteUnit <= 0x00FF){ 2404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( target < targetLimit){ 2405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(target++) = (unsigned char) targetByteUnit; 2406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets){ 2407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(offsets++) = (int32_t)(source - args->source-1); 2408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit); 2412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 2413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < targetLimit){ 2416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80); 2417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets){ 2418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(offsets++) = (int32_t)(source - args->source-1); 2419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < targetLimit){ 2421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(target++) =(unsigned char) (targetByteUnit -0x80); 2422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets){ 2423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(offsets++) = (int32_t)(source - args->source-1); 2424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80); 2427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 2428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80); 2431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80); 2432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 2433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* oops.. the code point is unassingned 2439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * set the error and reason 2440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*check if the char is a First surrogate*/ 2443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE(sourceChar)) { 2444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(sourceChar)) { 2445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 2446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*look ahead to find the trail surrogate*/ 2447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source < sourceLimit) { 2448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 2449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail=(UChar) *source; 2450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SECOND_SURROGATE(trail)) { 2451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source++; 2452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail); 2453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 2454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 2455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 2456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 2458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 2463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ZERO_ERROR; 2464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 2467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(unassigned) for a BMP code point */ 2472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 2473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUChar32=sourceChar; 2476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* end if(myTargetIndex<myTargetLength) */ 2479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 2481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }/* end while(mySourceIndex<mySourceLength) */ 2485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the end of the input stream and detection of truncated input 2488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are handled by the framework, but for ISO-2022-KR conversion 2489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to be in ASCII mode at the very end 2490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conditions: 2492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * successful 2493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not in ASCII mode 2494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of input and no truncated input 2495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U_SUCCESS(*err) && 2497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS && 2498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->flush && source>=sourceLimit && args->converter->fromUChar32==0 2499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 2500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex; 2501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we are switching to ASCII */ 2503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS=FALSE; 2504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the source index of the last input character */ 2506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO this would be simpler and more reliable if we used a pair 2508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of sourceIndex/prevSourceIndex like in ucnvmbcs.c 2509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so that we could simply use the prevSourceIndex here; 2510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this code gives an incorrect result for the rare case of an unmatched 2511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trail surrogate that is alone in the last buffer of the text stream 2512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=(int32_t)(source-args->source); 2514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceIndex>0) { 2515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 2516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U16_IS_TRAIL(args->source[sourceIndex]) && 2517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) 2518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 2519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 2520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=-1; 2523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 2526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter, 2527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SHIFT_IN_STR, 1, 2528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 2529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 2530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 2531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*save the state and return */ 2534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = source; 2535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = (char*)target; 2536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS; 2537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/************************ To Unicode ***************************************/ 2540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args, 2543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 2544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char const* sourceStart; 2545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo); 2546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterToUnicodeArgs subArgs; 2548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t minArgsSize; 2549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the subconverter arguments */ 2551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->size<sizeof(UConverterToUnicodeArgs)) { 2552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minArgsSize = args->size; 2553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); 2555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(&subArgs, args, minArgsSize); 2558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.size = (uint16_t)minArgsSize; 2559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.converter = myData->currentConverter; 2560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remember the original start of the input for offsets */ 2562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceStart = args->source; 2563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key != 0) { 2565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial escape sequence */ 2566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto escape; 2567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(U_SUCCESS(*err) && args->source < args->sourceLimit) { 2570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ 2571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.source = args->source; 2572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush); 2573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subArgs.source != subArgs.sourceLimit) { 2574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * get the current partial byte sequence 2576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it needs to be moved between the public and the subconverter 2578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so that the conversion framework, which only sees the public 2579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * converter, can handle truncated and illegal input etc. 2580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->converter->toULength > 0) { 2582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength); 2583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.converter->toULength = args->converter->toULength; 2585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Convert up to the end of the input, or to before the next escape character. 2588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Does not handle conversion extensions because the preToU[] state etc. 2589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is not copied. 2590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); 2592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets != NULL && sourceStart != args->source) { 2594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* update offsets to base them on the actual start of the input */ 2595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets = args->offsets; 2596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target = args->target; 2597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t delta = (int32_t)(args->source - sourceStart); 2598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(target < subArgs.target) { 2599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*offsets >= 0) { 2600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets += delta; 2601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++offsets; 2603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++target; 2604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = subArgs.source; 2607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = subArgs.target; 2608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets = subArgs.offsets; 2609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy input/error/overflow buffers */ 2611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subArgs.converter->toULength > 0) { 2612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength); 2613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = subArgs.converter->toULength; 2615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*err == U_BUFFER_OVERFLOW_ERROR) { 2617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subArgs.converter->UCharErrorBufferLength > 0) { 2618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, 2619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.converter->UCharErrorBufferLength); 2620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; 2622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.converter->UCharErrorBufferLength = 0; 2623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*err) || (args->source == args->sourceLimit)) { 2627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruescape: 2631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru changeState_2022(args->converter, 2632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &(args->source), 2633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->sourceLimit, 2634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022_KR, 2635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 2636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, 2641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 2642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char tempBuf[2]; 2643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySource = ( char *) args->source; 2644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *myTarget = args->target; 2645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySourceLimit = args->sourceLimit; 2646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 targetUniChar = 0x0000; 2647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar mySourceChar = 0x0000; 2648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData; 2649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData* sharedData ; 2650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback; 2651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData=(UConverterDataISO2022*)(args->converter->extraInfo); 2653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->version==1){ 2654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err); 2655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize state */ 2659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sharedData = myData->currentConverter->sharedData; 2660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = args->converter->useFallback; 2661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key != 0) { 2663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial escape sequence */ 2664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto escape; 2665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { 2666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial double-byte character */ 2667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar = args->converter->toUBytes[0]; 2668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 0; 2669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailByte; 2670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(mySource< mySourceLimit){ 2673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget < args->targetLimit){ 2675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar= (unsigned char) *mySource++; 2677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar==UCNV_SI){ 2679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->toU2022State.g = 0; 268085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (myData->isEmptySegment) { 268185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ 268285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *err = U_ILLEGAL_ESCAPE_SEQUENCE; 268385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUCallbackReason = UCNV_IRREGULAR; 268485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUBytes[0] = (uint8_t)mySourceChar; 268585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toULength = 1; 268685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->target = myTarget; 268785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->source = mySource; 268885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 268985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 2690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*consume the source */ 2691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else if(mySourceChar==UCNV_SO){ 2693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->toU2022State.g = 1; 269485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ 2695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*consume the source */ 2696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else if(mySourceChar==ESC_2022){ 2698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySource--; 2699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruescape: 270085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */ 2701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru changeState_2022(args->converter,&(mySource), 2702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceLimit, ISO_2022_KR, err); 2703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)){ 2704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 2705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 2706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */ 2712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->toU2022State.g == 1) { 2713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySource < mySourceLimit) { 271485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int leadIsOk, trailIsOk; 271585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint8_t trailByte; 2716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailByte: 271785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = missingCharMarker; 271885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailByte = (uint8_t)*mySource; 271985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 272085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Ticket 5691: consistent illegal sequences: 272185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - We include at least the first byte in the illegal sequence. 272285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - If any of the non-initial bytes could be the start of a character, 272385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * we stop the illegal sequence before the first one of those. 272485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 272585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is 272685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * an ESC/SO/SI, we report only the first byte as the illegal sequence. 272785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Otherwise we convert or report the pair of bytes. 272885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 272985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); 273085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); 273185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (leadIsOk && trailIsOk) { 273285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 273385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[0] = (char)(mySourceChar + 0x80); 273485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[1] = (char)(trailByte + 0x80); 2735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback); 273685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = (mySourceChar << 8) | trailByte; 273785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 273885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 273985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 274085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* add another bit so that the code below writes 2 bytes in case of error */ 274185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; 2742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toUBytes[0] = (uint8_t)mySourceChar; 2745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 1; 2746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 274985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho else if(mySourceChar <= 0x7f) { 2750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback); 275185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 275285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = 0xffff; 2753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetUniChar < 0xfffe){ 2755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets) { 2756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 2757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++)=(UChar)targetUniChar; 2759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 2761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Call the callback function*/ 2762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); 2763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 2768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 2772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 2773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************************** END ISO2022-KR *********************************/ 2776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************************** ISO-2022-CN ********************************* 2778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Rules for ISO-2022-CN Encoding: 2780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) The designator sequence must appear once on a line before any instance 2781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* of character set it designates. 2782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) If two lines contain characters from the same character set, both lines 2783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* must include the designator sequence. 2784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Once the designator sequence is known, a shifting sequence has to be found 2785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* to invoke the shifting 2786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iv) All lines start in ASCII and end in ASCII. 2787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* v) Four shifting sequences are employed for this purpose: 2788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Sequcence ASCII Eq Charsets 2790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ---------- ------- --------- 2791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SI <SI> US-ASCII 2792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165 2793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS2 <ESC>N CNS-11643-1992 Plane 2 2794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS3 <ESC>O CNS-11643-1992 Planes 3-7 2795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* vi) 2797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SOdesignator : ESC "$" ")" finalchar_for_SO 2798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS2designator : ESC "$" "*" finalchar_for_SS2 2799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS3designator : ESC "$" "+" finalchar_for_SS3 2800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ ) A Indicates the bytes following SO are Chinese 2802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* characters as defined in GB 2312-80, until 2803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* another SOdesignation appears 2804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ ) E Indicates the bytes following SO are as defined 2807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* in ISO-IR-165 (for details, see section 2.1), 2808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* until another SOdesignation appears 2809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ ) G Indicates the bytes following SO are as defined 2811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* in CNS 11643-plane-1, until another 2812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SOdesignation appears 2813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ * H Indicates the two bytes immediately following 2815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS2 is a Chinese character as defined in CNS 2816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-2, until another SS2designation 2817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (Meaning <ESC>N must preceed every 2 byte 2819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* sequence.) 2820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + I Indicates the immediate two bytes following SS3 2822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-3, until another SS3designation 2824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (Meaning <ESC>O must preceed every 2 byte 2826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* sequence.) 2827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + J Indicates the immediate two bytes following SS3 2829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-4, until another SS3designation 2831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (In English: <ESC>O must preceed every 2 byte 2833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* sequence.) 2834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + K Indicates the immediate two bytes following SS3 2836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-5, until another SS3designation 2838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + L Indicates the immediate two bytes following SS3 2841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-6, until another SS3designation 2843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + M Indicates the immediate two bytes following SS3 2846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-7, until another SS3designation 2848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and 2851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* has its own designation information before any Chinese characters 2852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appear 2853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 2855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The following are defined this way to make the strings truely readonly */ 2857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char GB_2312_80_STR[] = "\x1B\x24\x29\x41"; 2858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45"; 2859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47"; 2860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48"; 2861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49"; 2862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A"; 2863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B"; 2864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C"; 2865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D"; 2866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/********************** ISO2022-CN Data **************************/ 2868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* const escSeqCharsCN[10] ={ 2869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SHIFT_IN_STR, /* ASCII */ 2870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GB_2312_80_STR, 2871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_IR_165_STR, 2872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_1_STR, 2873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_2_STR, 2874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_3_STR, 2875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_4_STR, 2876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_5_STR, 2877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_6_STR, 2878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_7_STR 2879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 2880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 2884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *converterData; 2885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pFromU2022State; 2886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target = (uint8_t *) args->target; 2887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; 2888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* source = args->source; 2889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* sourceLimit = args->sourceLimit; 2890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t* offsets = args->offsets; 2891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 sourceChar; 2892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char buffer[8]; 2893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len; 2894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t choices[3]; 2895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t choiceCount; 2896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetValue = 0; 2897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback; 2898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the state */ 2900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData = (UConverterDataISO2022*)cnv->extraInfo; 2901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State = &converterData->fromU2022State; 2902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 2904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check if the last codepoint of previous buffer was a lead surrogate*/ 2906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { 2907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 2908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while( source < sourceLimit){ 2911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < targetLimit){ 2912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar = *(source++); 2914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*check if the char is a First surrogate*/ 2915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE(sourceChar)) { 2916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(sourceChar)) { 2917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 2918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*look ahead to find the trail surrogate*/ 2919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source < sourceLimit) { 2920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 2921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail=(UChar) *source; 2922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SECOND_SURROGATE(trail)) { 2923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source++; 2924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail); 2925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=0x00; 2926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this supplementary code point */ 2927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 2928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 2930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 2933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 2937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 2938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 2942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 2945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do the conversion */ 2950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar <= 0x007f ){ 2951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not convert SO/SI/ESC */ 2952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(IS_2022_CONTROL(sourceChar)) { 2953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 2956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* US-ASCII */ 2960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g == 0) { 2961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[0] = (char)sourceChar; 2962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 2963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[0] = UCNV_SI; 2965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[1] = (char)sourceChar; 2966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 2; 2967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 2968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 2969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar == CR || sourceChar == LF) { 2971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset the state at the end of a line */ 2972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(pFromU2022State, 0, sizeof(ISO2022State)); 2973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 2974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert U+0080..U+10ffff */ 2978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 2979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs, g; 2980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choiceCount == 0) { 2982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try the current SO/G1 converter first */ 2983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[0] = pFromU2022State->cs[1]; 2984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* default to GB2312_1 if none is designated yet */ 2986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choices[0] == 0) { 2987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[0] = GB2312_1; 2988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(converterData->version == 0) { 2991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-CN */ 2992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */ 2994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choices[0] == GB2312_1) { 2995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)CNS_11643_1; 2996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)GB2312_1; 2998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 2; 300127f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (converterData->version == 1) { 3002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-CN-EXT */ 3003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try one of the other converters */ 3005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(choices[0]) { 3006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case GB2312_1: 3007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)CNS_11643_1; 3008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[2] = (int8_t)ISO_IR_165; 3009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_IR_165: 3011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)GB2312_1; 3012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[2] = (int8_t)CNS_11643_1; 3013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: /* CNS_11643_x */ 3015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)GB2312_1; 3016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[2] = (int8_t)ISO_IR_165; 3017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 3; 302127f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 302227f654740f2a26ad62a5c155af9199af9e69b889claireho choices[0] = (int8_t)CNS_11643_1; 302327f654740f2a26ad62a5c155af9199af9e69b889claireho choices[1] = (int8_t)GB2312_1; 3024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = g = 0; 3028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len==0: no mapping found yet 3030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks 3031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len>0: found a roundtrip result, done 3032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 0; 3034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We will turn off useFallback after finding a fallback, 3036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but we still get fallbacks from PUA code points as usual. 3037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Therefore, we will also need to check that we don't overwrite 3038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * an early fallback with a later one. 3039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = cnv->useFallback; 3041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < choiceCount && len <= 0; ++i) { 3043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs0 = choices[i]; 3044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs0 > 0) { 3045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 3046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len2; 304785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(cs0 >= CNS_11643_0) { 3048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len2 = MBCS_FROM_UCHAR32_ISO2022( 3049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[CNS_11643], 3050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, 3051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 3052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, 3053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru MBCS_OUTPUT_3); 3054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 == 3 || (len2 == -3 && len == 0)) { 3055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = value; 3056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80); 3057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 >= 0) { 3058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 2; 3059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = -2; 3061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 3062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs == CNS_11643_1) { 3064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 1; 3065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(cs == CNS_11643_2) { 3066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 2; 3067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* plane 3..7 */ if(converterData->version == 1) { 3068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 3; 3069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-CN (without -EXT) does not support plane 3..7 */ 3071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 0; 3072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* GB2312_1 or ISO-IR-165 */ 3076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len2 = MBCS_FROM_UCHAR32_ISO2022( 3077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[cs0], 3078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, 3079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 3080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, 3081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru MBCS_OUTPUT_2); 3082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 == 2 || (len2 == -2 && len == 0)) { 3083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = value; 3084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = len2; 3085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 3086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 1; 3087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 3088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len != 0) { 3094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 0; /* count output bytes; it must have been abs(len) == 2 */ 3095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the designation sequence if necessary */ 3097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs != pFromU2022State->cs[g]) { 3098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs < CNS_11643) { 3099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer, escSeqCharsCN[cs], 4); 3100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4); 3102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 4; 3104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[g] = cs; 3105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(g == 1) { 3106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* changing the SO/G1 charset invalidates the choices[] */ 3107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 3108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the shift sequence if necessary */ 3112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(g != pFromU2022State->g) { 3113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(g) { 3114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 3115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = UCNV_SO; 3116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */ 3118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 1; 3119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 3121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = 0x1b; 3122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = 0x4e; 3123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: /* case 3 */ 3125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = 0x1b; 3126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = 0x4f; 3127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the two output bytes */ 3132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = (char)(targetValue >> 8); 3133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = (char)targetValue; 3134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* if we cannot find the character after checking all codepages 3136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * then this is an error 3137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 3139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 3140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output len>0 bytes in buffer[] */ 3145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len == 1) { 3146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[0]; 3147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) { 3148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */ 3149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(len == 2 && (target + 2) <= targetLimit) { 3151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[0]; 3152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[1]; 3153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) { 3154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)); 3155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = sourceIndex; 3156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = sourceIndex; 3157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 3160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 3161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer, len, 3162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 3163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)), 3164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 3165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)) { 3166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* end if(myTargetIndex<myTargetLength) */ 3170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 3171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 3172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }/* end while(mySourceIndex<mySourceLength) */ 3176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the end of the input stream and detection of truncated input 3179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are handled by the framework, but for ISO-2022-CN conversion 3180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to be in ASCII mode at the very end 3181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 3182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conditions: 3183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * successful 3184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not in ASCII mode 3185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of input and no truncated input 3186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U_SUCCESS(*err) && 3188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g!=0 && 3189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->flush && source>=sourceLimit && cnv->fromUChar32==0 3190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 3191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex; 3192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we are switching to ASCII */ 3194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g=0; 3195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the source index of the last input character */ 3197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO this would be simpler and more reliable if we used a pair 3199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of sourceIndex/prevSourceIndex like in ucnvmbcs.c 3200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so that we could simply use the prevSourceIndex here; 3201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this code gives an incorrect result for the rare case of an unmatched 3202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trail surrogate that is alone in the last buffer of the text stream 3203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=(int32_t)(source-args->source); 3205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceIndex>0) { 3206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 3207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U16_IS_TRAIL(args->source[sourceIndex]) && 3208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) 3209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 3210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 3211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=-1; 3214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 3217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 3218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SHIFT_IN_STR, 1, 3219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 3220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 3221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 3222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*save the state and return */ 3225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = source; 3226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = (char*)target; 3227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 3231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, 3232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 3233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char tempBuf[3]; 3234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySource = (char *) args->source; 3235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *myTarget = args->target; 3236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySourceLimit = args->sourceLimit; 3237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetUniChar = 0x0000; 3238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t mySourceChar = 0x0000; 3239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData; 3240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pToU2022State; 3241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData=(UConverterDataISO2022*)(args->converter->extraInfo); 3243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State = &myData->toU2022State; 3244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key != 0) { 3246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial escape sequence */ 3247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto escape; 3248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { 3249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial double-byte character */ 3250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar = args->converter->toUBytes[0]; 3251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 0; 325285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = missingCharMarker; 3253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailByte; 3254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(mySource < mySourceLimit){ 3257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar =missingCharMarker; 3259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget < args->targetLimit){ 3261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar= (unsigned char) *mySource++; 3263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(mySourceChar){ 3265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UCNV_SI: 3266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=0; 326785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (myData->isEmptySegment) { 326885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ 326985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *err = U_ILLEGAL_ESCAPE_SEQUENCE; 327085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUCallbackReason = UCNV_IRREGULAR; 327185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUBytes[0] = mySourceChar; 327285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toULength = 1; 327385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->target = myTarget; 327485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->source = mySource; 327585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 327685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 3277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 3278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UCNV_SO: 3280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pToU2022State->cs[1] != 0) { 3281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=1; 328285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ 3283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 3284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* illegal to have SO before a matching designator */ 328685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */ 3287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ESC_2022: 3291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySource--; 3292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruescape: 329385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho { 329485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char * mySourceBefore = mySource; 329585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t toULengthBefore = args->converter->toULength; 329685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 329785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho changeState_2022(args->converter,&(mySource), 329885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceLimit, ISO_2022_CN,err); 329985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 330085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* After SO there must be at least one character before a designator (designator error handled separately) */ 330185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { 330285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *err = U_ILLEGAL_ESCAPE_SEQUENCE; 330385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUCallbackReason = UCNV_IRREGULAR; 330450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore)); 330585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 330685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 3307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* invalid or illegal escape sequence */ 3309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)){ 3310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 3311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 331285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ 3313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 3314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 3316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */ 3318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case CR: 3320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*falls through*/ 3321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case LF: 3322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(pToU2022State, 0, sizeof(ISO2022State)); 3323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* falls through */ 3324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 3325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert one or two bytes */ 332685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; 3327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pToU2022State->g != 0) { 3328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySource < mySourceLimit) { 3329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData *cnv; 3330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru StateEnum tempState; 3331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t tempBufLen; 333285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int leadIsOk, trailIsOk; 333385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint8_t trailByte; 3334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailByte: 333585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailByte = (uint8_t)*mySource; 333685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 333785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Ticket 5691: consistent illegal sequences: 333885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - We include at least the first byte in the illegal sequence. 333985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - If any of the non-initial bytes could be the start of a character, 334085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * we stop the illegal sequence before the first one of those. 334185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 334285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is 334385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * an ESC/SO/SI, we report only the first byte as the illegal sequence. 334485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Otherwise we convert or report the pair of bytes. 334585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 334685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); 334785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); 334885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (leadIsOk && trailIsOk) { 334985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 335085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempState = (StateEnum)pToU2022State->cs[pToU2022State->g]; 335185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(tempState >= CNS_11643_0) { 335285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv = myData->myConverterArray[CNS_11643]; 335385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0)); 335485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[1] = (char) (mySourceChar); 335585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[2] = (char) trailByte; 335685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBufLen = 3; 335785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 335885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho }else{ 335985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv = myData->myConverterArray[tempState]; 336085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[0] = (char) (mySourceChar); 336185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[1] = (char) trailByte; 336285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBufLen = 2; 336385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 336485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE); 336585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = (mySourceChar << 8) | trailByte; 336685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 336785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 336885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 336985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* add another bit so that the code below writes 2 bytes in case of error */ 337085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; 3371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pToU2022State->g>=2) { 3373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return from a single-shift state to the previous one */ 3374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=pToU2022State->prevG; 3375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toUBytes[0] = (uint8_t)mySourceChar; 3378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 1; 3379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 3380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 3383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 3384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = (UChar) mySourceChar; 3385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){ 3390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 3391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 3392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++)=(UChar)targetUniChar; 3394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if(targetUniChar > missingCharMarker){ 3396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* disassemble the surrogate pair and write to output*/ 3397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar-=0x0010000; 3398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10)); 3399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 3400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 3401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++myTarget; 3403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget< args->targetLimit){ 3404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); 3405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 3406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 3407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++myTarget; 3409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 3410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= 3411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); 3412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 3416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Call the callback function*/ 3417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); 3418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 3422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 3423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 3427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 3428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 3429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 3432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { 3433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 3434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo; 3435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pFromU2022State=&myConverterData->fromU2022State; 3436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *p, *subchar; 3437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char buffer[8]; 3438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 3439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subchar=(char *)cnv->subChars; 3441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=cnv->subCharLen; /* assume length==1 for most variants */ 3442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p = buffer; 3444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(myConverterData->locale[0]){ 3445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'j': 3446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 3447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs; 3448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g == 1) { 3450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS7: switch from G1 to G0 */ 3451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 3452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = UCNV_SI; 3453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = pFromU2022State->cs[0]; 3456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs != ASCII && cs != JISX201) { 3457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not in ASCII or JIS X 0201: switch to ASCII */ 3458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[0] = (int8_t)ASCII; 3459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = '\x1b'; 3460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = '\x28'; 3461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = '\x42'; 3462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[0]; 3465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'c': 3468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g != 0) { 3469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not in ASCII mode: switch to ASCII */ 3470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 3471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = UCNV_SI; 3472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[0]; 3474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'k': 3476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->version == 0) { 3477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length == 1) { 3478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((UBool)args->converter->fromUnicodeStatus) { 3479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* in DBCS mode: switch to SBCS */ 3480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUnicodeStatus = 0; 3481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = UCNV_SI; 3482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[0]; 3484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* length == 2*/ { 3485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!(UBool)args->converter->fromUnicodeStatus) { 3486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* in SBCS mode: switch to DBCS */ 3487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUnicodeStatus = 1; 3488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = UCNV_SO; 3489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[0]; 3491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[1]; 3492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save the subconverter's substitution string */ 3496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *currentSubChars = myConverterData->currentConverter->subChars; 3497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen; 3498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set our substitution string into the subconverter */ 3500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->subChars = (uint8_t *)subchar; 3501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->subCharLen = (int8_t)length; 3502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* let the subconverter write the subchar, set/retrieve fromUChar32 state */ 3504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter = myConverterData->currentConverter; 3505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32; 3506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_cbFromUWriteSub(args, 0, err); 3507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32; 3508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter = cnv; 3509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* restore the subconverter's substitution string */ 3511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->subChars = currentSubChars; 3512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->subCharLen = currentSubCharLen; 3513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*err == U_BUFFER_OVERFLOW_ERROR) { 3515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->currentConverter->charErrorBufferLength > 0) { 3516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy( 3517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer, 3518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBuffer, 3519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBufferLength); 3520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength; 3522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBufferLength = 0; 3523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 3525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 3527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not expected */ 3528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_cbFromUWriteBytes(args, 3531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer, (int32_t)(p - buffer), 3532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsetIndex, err); 3533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 3536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Structure for cloning an ISO 2022 converter into a single memory block. 3537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ucnv_safeClone() of the converter will align the entire cloneStruct, 3538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and then ucnv_safeClone() of the sub-converter may additionally align 3539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * currentConverter inside the cloneStruct, for which we need the deadSpace 3540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * after currentConverter. 3541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is because UAlignedMemory may be larger than the actually 3542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * necessary alignment size for the platform. 3543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The other cloneStruct fields will not be moved around, 3544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and are aligned properly with cloneStruct's alignment. 3545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct cloneStruct 3547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 3548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter cnv; 3549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter currentConverter; 3550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UAlignedMemory deadSpace; 3551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 mydata; 3552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UConverter * 3556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_SafeClone( 3557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UConverter *cnv, 3558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void *stackBuffer, 3559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *pBufferSize, 3560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *status) 3561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 3562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct cloneStruct * localClone; 3563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *cnvData; 3564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, size; 3565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ 3567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pBufferSize = (int32_t)sizeof(struct cloneStruct); 3568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 3569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData = (UConverterDataISO2022 *)cnv->extraInfo; 3572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone = (struct cloneStruct *)stackBuffer; 3573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ 3575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022)); 3577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */ 3578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->cnv.isExtraLocal = TRUE; 3579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* share the subconverters */ 3581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnvData->currentConverter != NULL) { 3583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ 3584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->mydata.currentConverter = 3585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_safeClone(cnvData->currentConverter, 3586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &localClone->currentConverter, 3587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &size, status); 3588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*status)) { 3589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 3590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) { 3594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnvData->myConverterArray[i] != NULL) { 3595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_incrementRefCount(cnvData->myConverterArray[i]); 3596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return &localClone->cnv; 3600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 3603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_GetUnicodeSet(const UConverter *cnv, 3604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const USetAdder *sa, 3605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterUnicodeSet which, 3606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) 3607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 3608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 3609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* cnvData; 3610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*pErrorCode)) { 3612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 3613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 3615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cnv->sharedData == &_ISO2022Data) { 3616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* We use UTF-8 in this case */ 3617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0, 0xd7FF); 3618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0xE000, 0x10FFFF); 3619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 3620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 3622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData = (UConverterDataISO2022*)cnv->extraInfo; 3624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* open a set and initialize it with code points that are algorithmically round-tripped */ 3626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(cnvData->locale[0]){ 3627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'j': 3628c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* include JIS X 0201 which is hardcoded */ 3629c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru sa->add(sa->set, 0xa5); 3630c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru sa->add(sa->set, 0x203e); 3631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { 3632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* include Latin-1 for some variants of JP */ 3633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0, 0xff); 3634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* include ASCII for JP */ 3636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0, 0x7f); 3637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3638c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { 3639c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 3640c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0 3641c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) 3642c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * use half-width Katakana. 3643c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) 3644c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * half-width Katakana via the ESC ( I sequence. 3645c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * However, we only emit (fromUnicode) half-width Katakana according to the 3646c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * definition of each variant. 3647c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * 3648c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * When including fallbacks, 3649c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * we need to include half-width Katakana Unicode code points for all JP variants because 3650c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana). 3651c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 3652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* include half-width Katakana for JP */ 3653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, HWKANA_START, HWKANA_END); 3654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'c': 3657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'z': 3658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* include ASCII for CN */ 3659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0, 0x7f); 3660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'k': 3662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is only one converter for KR, and it is not in the myConverterArray[] */ 3663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData->currentConverter->sharedData->impl->getUnicodeSet( 3664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData->currentConverter, sa, which, pErrorCode); 3665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the loop over myConverterArray[] will simply not find another converter */ 3666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 3668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3671c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */ 3672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData->version==0 && i==CNS_11643 3674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 3675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */ 3676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_MBCSGetUnicodeSetForBytes( 3677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData->myConverterArray[i], 3678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa, UCNV_ROUNDTRIP_SET, 3679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0x81, 0x82, 3680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 3681c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 3682c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru#endif 3683c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 3684c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 3685c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UConverterSetFilter filter; 3686c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(cnvData->myConverterArray[i]!=NULL) { 3687c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3688c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru cnvData->version==0 && i==CNS_11643 3689c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ) { 3690c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 3691c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Version-specific for CN: 3692c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * CN version 0 does not map CNS planes 3..7 although 3693c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * they are all available in the CNS conversion table; 3694c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * CN version 1 (-EXT) does map them all. 3695c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * The two versions create different Unicode sets. 3696c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 3697c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter=UCNV_SET_FILTER_2022_CN; 3698c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(cnvData->locale[0]=='j' && i==JISX208) { 3699c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 3700c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Only add code points that map to Shift-JIS codes 3701c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * corresponding to JIS X 0208. 3702c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 3703c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter=UCNV_SET_FILTER_SJIS; 3704c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(i==KSC5601) { 3705c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 3706c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) 3707c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * are broader than GR94. 3708c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 3709c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter=UCNV_SET_FILTER_GR94DBCS; 3710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3711c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter=UCNV_SET_FILTER_NONE; 3712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3713c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode); 3714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ISO 2022 converters must not convert SO/SI/ESC despite what 3719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sub-converters do by themselves. 3720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove these characters from the set. 3721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->remove(sa->set, 0x0e); 3723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->remove(sa->set, 0x0f); 3724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->remove(sa->set, 0x1b); 3725c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 3726c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* ISO 2022 converters do not convert C1 controls either */ 3727c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru sa->removeRange(sa->set, 0x80, 0x9f); 3728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _ISO2022Impl={ 3731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Open, 3737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close, 3738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Reset, 3739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 3741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC, 3742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC, 3743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode_UTF8, 3744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, 3745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 3746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 3751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022getName, 3755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_WriteSub, 3756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_SafeClone, 3757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_GetUnicodeSet 3758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _ISO2022StaticData={ 3760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 3761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "ISO_2022", 3762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2022, 3763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, 3764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ 3767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x1a, 0, 0, 0 }, 3768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _ISO2022Data={ 3776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), 3777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~((uint32_t) 0), 3778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022StaticData, 3781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022Impl, 3783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 3784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************JP****************/ 3787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _ISO2022JPImpl={ 3788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Open, 3794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close, 3795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Reset, 3796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC, 3798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC, 3799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC, 3800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC, 3801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022getName, 3805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_WriteSub, 3806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_SafeClone, 3807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_GetUnicodeSet 3808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _ISO2022JPStaticData={ 3810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 3811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "ISO_2022_JP", 3812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, 3814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */ 3817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x1a, 0, 0, 0 }, 3818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterSharedData _ISO2022JPData={ 3826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), 3827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~((uint32_t) 0), 3828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022JPStaticData, 3831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022JPImpl, 3833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 3834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/************* KR ***************/ 3837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _ISO2022KRImpl={ 3838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Open, 3844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close, 3845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Reset, 3846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC, 3848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC, 3849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC, 3850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC, 3851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022getName, 3855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_WriteSub, 3856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_SafeClone, 3857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_GetUnicodeSet 3858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _ISO2022KRStaticData={ 3860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 3861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "ISO_2022_KR", 3862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, 3864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* max 3 bytes per UChar: SO+DBCS */ 3867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x1a, 0, 0, 0 }, 3868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterSharedData _ISO2022KRData={ 3876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), 3877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~((uint32_t) 0), 3878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022KRStaticData, 3881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022KRImpl, 3883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 3884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** CN ***************/ 3887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _ISO2022CNImpl={ 3888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Open, 3895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close, 3896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Reset, 3897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC, 3899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC, 3900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC, 3901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC, 3902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022getName, 3906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_WriteSub, 3907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_SafeClone, 3908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_GetUnicodeSet 3909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _ISO2022CNStaticData={ 3911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 3912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "ISO_2022_CN", 3913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, 3915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */ 3918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x1a, 0, 0, 0 }, 3919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterSharedData _ISO2022CNData={ 3927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), 3928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~((uint32_t) 0), 3929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022CNStaticData, 3932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022CNImpl, 3934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 3935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 3940