1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2000-2014, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* file name: ucnv2022.cpp 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2000feb03 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Change history: 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 06/29/2000 helena Major rewrite of the callback APIs. 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 08/08/2000 Ram Included support for ISO-2022-JP-2 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Changed implementation of toUnicode 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* function 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 08/21/2000 Ram Added support for ISO-2022-KR 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 08/29/2000 Ram Seperated implementation of EBCDIC to 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ucnvebdc.c 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 09/20/2000 Ram Added support for ISO-2022-CN 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Added implementations for getNextUChar() 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* for specific 2022 country variants. 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 10/31/2000 Ram Implemented offsets logic functions 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uset.h" 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_err.h" 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_cb.h" 37103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_imp.h" 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnvmbcs.h" 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 44103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uassert.h" 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * I am disabling the generic ISO-2022 converter after proposing to do so on 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the icu mailing list two days ago. 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Reasons: 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * its designation sequences, single shifts with return to the previous state, 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * switch-with-no-return to UTF-16BE or similar, etc. 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is unlike the language-specific variants like ISO-2022-JP which 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * require a much smaller repertoire of ISO-2022 features. 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * These variants continue to be supported. 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. I believe that no one is really using the generic ISO-2022 converter 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but rather always one of the language-specific variants. 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note that ICU's generic ISO-2022 converter has always output one escape 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sequence followed by UTF-8 for the whole stream. 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 3. Switching between subcharsets is extremely slow, because each time 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the previous converter is closed and a new one opened, 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * without any kind of caching, least-recently-used list, etc. 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4. The code is currently buggy, and given the above it does not seem 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reasonable to spend the time on maintenance. 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings. 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This means, for example, that when ISO-8859-7 is designated, the following 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff. 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The ICU ISO-2022 converter does not handle this - and has no information 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * about which subconverter would have to be shifted vs. which is designed 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for 7-bit ISO-2022. 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Markus Scherer 2003-dec-03 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char SHIFT_IN_STR[] = "\x0F"; 8154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius// static const char SHIFT_OUT_STR[] = "\x0E"; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CR 0x0D 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LF 0x0A 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define H_TAB 0x09 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define V_TAB 0x0B 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SPACE 0x20 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA_START=0xff61, 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA_END=0xff9f 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 94-character sets with native byte values A1..FE are encoded in ISO 2022 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * as bytes 21..7E. (Subtract 0x80.) 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 96-character sets with native byte values A0..FF are encoded in ISO 2022 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * as bytes 20..7F. (Subtract 0x80.) 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Do not encode C1 control codes with native bytes 80..9F 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * as bytes 00..1F (C0 control codes). 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GR94_START=0xa1, 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GR94_END=0xfe, 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GR96_START=0xa0, 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GR96_END=0xff 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ISO 2022 control codes must not be converted from Unicode 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * because they would mess up the byte stream. 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * corresponding to SO, SI, and ESC. 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0) 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* for ISO-2022-JP and -CN implementations */ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum { 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* shared values */ 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru INVALID_STATE=-1, 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ASCII = 0, 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SS2_STATE=0x10, 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SS3_STATE, 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JP */ 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO8859_1 = 1 , 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO8859_7 = 2 , 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX201 = 3, 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX208 = 4, 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX212 = 5, 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GB2312 =6, 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru KSC5601 =7, 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */ 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* CN */ 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the first few enum constants must keep their values because they correspond to myConverterArray[] */ 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GB2312_1=1, 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_IR_165=2, 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643=3, 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * these are used in StateEnum and ISO2022State variables, 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but CNS_11643 must be used to index into myConverterArray[] 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_0=0x20, 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1, 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_2, 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_3, 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_4, 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_5, 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_6, 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_7 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} StateEnum; 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* is the StateEnum charset value for a DBCS charset? */ 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CSM(cs) ((uint16_t)1<<(cs)) 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note: The converter uses some leniency: 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * all versions, not just JIS7 and JIS8. 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - ICU does not distinguish between different versions of JIS X 0208. 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 17085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoenum { MAX_JA_VERSION=4 }; 17185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum { 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ASCII1=0, 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru LATIN1, 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SBCS, 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru DBCS, 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru MBCS, 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}Cnv2022Type; 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct ISO2022State { 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */ 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */ 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t prevG; /* g before single shift (SS2 or SS3) */ 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} ISO2022State; 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_OPTIONS_VERSION_MASK 0xf 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_2022_MAX_CONVERTERS 10 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct{ 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS]; 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *currentConverter; 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Cnv2022Type currentType; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State toU2022State, fromU2022State; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t key; 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t version; 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isFirstBuffer; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 20785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UBool isEmptySegment; 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char name[30]; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char locale[3]; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}UConverterDataISO2022; 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Protos */ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* ISO-2022 ----------------------------------------------------------------- */ 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*Forward declaration */ 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args, 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode * err); 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args, 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode * err); 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define ESC_2022 0x1B /*ESC*/ 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/ 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/ 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/ 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} UCNV_TableStates_2022; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The way these state transition arrays work is: 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ex : ESC$B is the sequence for JISX208 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* a) First Iteration: char is ESC 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* int x = normalize_esq_chars_2022[27] which is equal to 1 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) Search for this value in escSeqStateTable_Key_2022[] 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* value of x is stored at escSeqStateTable_Key_2022[0] 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Save this index as offset 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iv) Get state of this sequence from escSeqStateTable_Value_2022[] 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* b) Switch on this state and continue to next char 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* which is normalize_esq_chars_2022[36] == 4 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) x is currently 1(from above) 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* x<<=5 -- x is now 32 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* x+=normalize_esq_chars_2022[36] 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* now x is 36 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Search for this value in escSeqStateTable_Key_2022[] 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iv) Get state of this sequence from escSeqStateTable_Value_2022[] 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* c) Switch on this state and continue to next char 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) x is currently 36 (from above) 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* x<<=5 -- x is now 1152 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* x+=normalize_esq_chars_2022[66] 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* now x is 1161 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Search for this value in escSeqStateTable_Key_2022[] 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iv) Get state of this sequence from escSeqStateTable_Value_2022[21] 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*Below are the 3 arrays depicting a state transition table*/ 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t normalize_esq_chars_2022[256] = { 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,0 ,0 ,0 ,0 ,0 ,0 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * When the generic ISO-2022 converter is completely removed, not just disabled 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * per #ifdef, then the following state table and the associated tables that are 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * dimensioned with MAX_STATES_2022 should be trimmed. 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the associated escape sequences starting with ESC ( B should be removed. 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This includes the ones with key values 1097 and all of the ones above 1000000. 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For the latter, the tables can simply be truncated. 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * For the former, since the tables must be kept parallel, it is probably best 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to simply duplicate an adjacent table cell, parallel in all tables. 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * It may make sense to restructure the tables, especially by using small search 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * tables for the variants instead of indexing them parallel to the table here. 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_STATES_2022 74 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = { 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,35947631 ,35947635 ,35947636 ,35947638 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = { 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 0 1 2 3 4 5 6 7 8 9 */ 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1" 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8" 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383" 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165" 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1" 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = { 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Type def for refactoring changeState_2022 code*/ 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef enum{ 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022=0, 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022_JP=1, 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022_KR=2, 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022_CN=3 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} Variant2022; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*********** ISO 2022 Converter Protos ***********/ 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 37685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close(UConverter *converter); 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022Reset(UConverter *converter, UConverterResetChoice choice); 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022getName(const UConverter* cnv); 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err); 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UConverter * 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status); 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruT_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err); 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 398103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusnamespace { 399103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*const UConverterSharedData _ISO2022Data;*/ 401103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusextern const UConverterSharedData _ISO2022JPData; 402103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusextern const UConverterSharedData _ISO2022KRData; 403103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusextern const UConverterSharedData _ISO2022CNData; 404103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 405103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} // namespace 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** Converter implementations ******************/ 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The purpose of this function is to get around gcc compiler warnings. */ 410103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline void 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerufromUWriteUInt8(UConverter *cnv, 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *bytes, int32_t length, 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t **target, const char *targetLimit, 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex, 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *targetChars = (char *)*target; 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit, 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, sourceIndex, pErrorCode); 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target = (uint8_t*)targetChars; 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 425103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline void 426103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliussetInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){ 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->version == 1) { 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = myConverterData->currentConverter; 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0; /* offset */ 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=0; /* state */ 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; /* byteIndex */ 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 436103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline void 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusetInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){ 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* in ISO-2022-KR the designator sequence appears only once 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in a file so we append it only once 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( converter->charErrorBufferLength==0){ 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBufferLength = 4; 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[0] = 0x1b; 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[1] = 0x24; 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[2] = 0x29; 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[3] = 0x43; 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->version == 1) { 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = myConverterData->currentConverter; 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=0; 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=1; /* prevLength */ 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 45885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char myLocale[6]={' ',' ',' ',' ',' ',' '}; 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022)); 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->extraInfo != NULL) { 46485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterNamePieces stackPieces; 465103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo; 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t version; 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable; 47085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022)); 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentType = ASCII1; 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus =FALSE; 47485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->locale){ 47585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)); 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 47785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho version = pArgs->options & UCNV_OPTIONS_VERSION_MASK; 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->version = version; 4798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius /* Begin Google-specific change. */ 480b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru /* The "jk" locale ID was made up for KDDI ISO-2022-JP. */ 481b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru /* The "js" locale ID was made up for SoftBank ISO-2022-JP. */ 482b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru if((myLocale[0]=='j' && 483b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru (myLocale[1]=='a'|| myLocale[1]=='p' || myLocale[1]=='k' || 484b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru myLocale[1]=='s') && 485b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru (myLocale[2]=='_' || myLocale[2]=='\0'))) 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru size_t len=0; 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* open the required converters and cache them */ 48985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(version>MAX_JA_VERSION) { 49085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* prevent indexing beyond jpCharsetMasks[] */ 49185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->version = version = 0; 49285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 49485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[ISO8859_7] = 49585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 497b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru if (myLocale[1]=='k') { /* Use KDDI's version. */ 49859d709d503bab6e2b61931737e662dd293b40578ccornelius myConverterData->myConverterArray[JISX208] = 49985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("kddi-jisx-208-2007", &stackPieces, &stackArgs, errorCode); 500b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru } else if (myLocale[1]=='s') { /* Use SoftBank's version. */ 50159d709d503bab6e2b61931737e662dd293b40578ccornelius myConverterData->myConverterArray[JISX208] = 50285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("softbank-jisx-208-2007", &stackPieces, &stackArgs, errorCode); 503b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru } else { 5048393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius /* 5058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * Change for http://b/issue?id=937017 : 5068393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * Restore JIS X 0208 ISO-2022-JP mappings from before 5078393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * sharing the table with the Shift-JIS converter 5088393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * (CL 5963009 and http://bugs.icu-project.org/trac/ticket/5797). 5098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * TODO(mscherer): Create and use a new, unified Google Shift-JIS 5108393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * table for both Shift-JIS and ISO-2022-JP. 5118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius */ 5128393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius myConverterData->myConverterArray[JISX208] = 5138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ucnv_loadSharedData("jisx-208", &stackPieces, &stackArgs, errorCode); 514b8157e19529c589d80c90045c5f363116d2300fbJean-Baptiste Queru } 5158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius /* End Google-specific change. */ 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[version]&CSM(JISX212)) { 51785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[JISX212] = 51885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode); 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[version]&CSM(GB2312)) { 52185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[GB2312] = 522ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* BEGIN android-changed */ 52327f654740f2a26ad62a5c155af9199af9e69b889claireho ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */ 524ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* END android-changed */ 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[version]&CSM(KSC5601)) { 52785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[KSC5601] = 52885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode); 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the function pointers to appropriate funtions */ 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(myConverterData->locale,"ja"); 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version="); 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = uprv_strlen(myConverterData->name); 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->name[len+1]='\0'; 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (myLocale[2]=='_' || myLocale[2]=='\0')) 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 54385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char *cnvName; 54485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(version==1) { 54585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnvName="icu-internal-25546"; 54685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 547ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* BEGIN android-changed */ 548e53684e98c4bb65676a4b8b81a50ce4a0304aedcClaire Ho cnvName="ksc_5601"; 549ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* END android-changed */ 55085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->version=version=0; 55185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 55285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->onlyTestIsLoadable) { 55385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carries result */ 55485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(cnv->extraInfo); 55585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->extraInfo=NULL; 55685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 55785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 55885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->currentConverter=ucnv_open(cnvName, errorCode); 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*errorCode)) { 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close(cnv); 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(version==1) { 56585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1"); 56685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4); 56785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->subCharLen = myConverterData->currentConverter->subCharLen; 56885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho }else{ 56985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0"); 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 57285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* initialize the state variables */ 57385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setInitialStateToUnicodeKR(cnv, myConverterData); 57485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setInitialStateFromUnicodeKR(cnv, myConverterData); 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 57685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set the function pointers to appropriate funtions */ 57785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; 57885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_strcpy(myConverterData->locale,"ko"); 57985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&& 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (myLocale[2]=='_' || myLocale[2]=='\0')) 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* open the required converters and cache them */ 586ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* BEGIN android-changed */ 58785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[GB2312_1] = 588e53684e98c4bb65676a4b8b81a50ce4a0304aedcClaire Ho ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(version==1) { 59085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[ISO_IR_165] = 591e53684e98c4bb65676a4b8b81a50ce4a0304aedcClaire Ho ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode); 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 59385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->myConverterArray[CNS_11643] = 594e53684e98c4bb65676a4b8b81a50ce4a0304aedcClaire Ho ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode); 595ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* END android-changed */ 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the function pointers to appropriate funtions */ 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(myConverterData->locale,"cn"); 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 60227f654740f2a26ad62a5c155af9199af9e69b889claireho if (version==0){ 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->version = 0; 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0"); 60527f654740f2a26ad62a5c155af9199af9e69b889claireho }else if (version==1){ 60627f654740f2a26ad62a5c155af9199af9e69b889claireho myConverterData->version = 1; 60727f654740f2a26ad62a5c155af9199af9e69b889claireho (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1"); 60827f654740f2a26ad62a5c155af9199af9e69b889claireho }else { 60927f654740f2a26ad62a5c155af9199af9e69b889claireho myConverterData->version = 2; 61027f654740f2a26ad62a5c155af9199af9e69b889claireho (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->isFirstBuffer = TRUE; 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* append the UTF-8 escape sequence */ 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength = 3; 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[0] = 0x1b; 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[1] = 0x25; 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[2] = 0x42; 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->sharedData=(UConverterSharedData*)&_ISO2022Data; 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize the state variables */ 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(myConverterData->name,"ISO_2022"); 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *errorCode = U_UNSUPPORTED_ERROR; 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar; 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close(cnv); 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *errorCode = U_MEMORY_ALLOCATION_ERROR; 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022Close(UConverter *converter) { 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo); 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData **array = myData->myConverterArray; 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (converter->extraInfo != NULL) { 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*close the array of converter pointers and free the memory*/ 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(array[i]!=NULL) { 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_unloadSharedDataIfReady(array[i]); 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(myData->currentConverter); 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!converter->isExtraLocal){ 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free (converter->extraInfo); 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->extraInfo = NULL; 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) { 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo); 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State)); 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->key = 0; 67285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myConverterData->isEmptySegment = FALSE; 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State)); 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->locale[0] == 0){ 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->isFirstBuffer = TRUE; 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->key = 0; 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (converter->mode == UCNV_SO){ 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close (myConverterData->currentConverter); 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter=NULL; 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->mode = UCNV_SI; 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* re-append UTF-8 escape sequence */ 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBufferLength = 3; 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[0] = 0x1b; 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[1] = 0x28; 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converter->charErrorBuffer[2] = 0x42; 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset the state variables */ 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->locale[0] == 'k'){ 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setInitialStateToUnicodeKR(converter, myConverterData); 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setInitialStateFromUnicodeKR(converter, myConverterData); 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO2022getName(const UConverter* cnv){ 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->extraInfo){ 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo; 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return myData->name; 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** to unicode *******************/ 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**************************************************************************** 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Recognized escape sequences are 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>(B ASCII 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>.A ISO-8859-1 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>.F ISO-8859-7 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>(J JISX-201 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>(I JISX-201 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$B JISX-208 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$@ JISX-208 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$(D JISX-212 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$A GB2312 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <ESC>$(C KSC5601 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 73585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** to unicode *******************/ 74885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 0 1 2 3 4 5 6 7 8 9 */ 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UCNV_TableStates_2022 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetKey_2022(char c,int32_t* key,int32_t* offset){ 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t togo; 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t low = 0; 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t hi = MAX_STATES_2022; 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t oldmid=0; 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru togo = normalize_esq_chars_2022[(uint8_t)c]; 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(togo == 0) { 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not a valid character anywhere in an escape sequence */ 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *key = 0; 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offset = 0; 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return INVALID_2022; 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru togo = (*key << 5) + togo; 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (hi != low) /*binary search*/{ 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t mid = (hi+low) >> 1; /*Finds median*/ 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mid == oldmid) 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (escSeqStateTable_Key_2022[mid] > togo){ 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru hi = mid; 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if (escSeqStateTable_Key_2022[mid] < togo){ 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru low = mid; 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else /*we found it*/{ 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *key = togo; 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offset = mid; 79385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid]; 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru oldmid = mid; 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *key = 0; 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offset = 0; 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return INVALID_2022; 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*runs through a state machine to determine the escape sequence - codepage correspondance 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruchangeState_2022(UConverter* _this, 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char** source, 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* sourceLimit, 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Variant2022 var, 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_TableStates_2022 value; 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo); 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t key = myData2022->key; 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offset = 0; 81685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t initialToULength = _this->toULength; 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char c; 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = VALID_NON_TERMINAL_2022; 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (*source < sourceLimit) { 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = *(*source)++; 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->toUBytes[_this->toULength++]=(uint8_t)c; 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = getKey_2022(c,(int32_t *) &key, &offset); 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (value){ 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case VALID_NON_TERMINAL_2022 : 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with the loop */ 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case VALID_TERMINAL_2022: 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru key = 0; 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto DONE; 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case INVALID_2022: 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto DONE; 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case VALID_MAYBE_TERMINAL_2022: 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ESC ( B is ambiguous only for ISO_2022 itself */ 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(var == ISO_2022) { 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */ 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->toULength = 0; 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */ 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with the loop */ 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = VALID_NON_TERMINAL_2022; 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not ISO_2022 itself, finish here */ 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = VALID_TERMINAL_2022; 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru key = 0; 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto DONE; 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruDONE: 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->key = key; 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (value == VALID_NON_TERMINAL_2022) { 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* indicate that the escape sequence is incomplete: key!=0 */ 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (value == INVALID_2022 ) { 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* value == VALID_TERMINAL_2022 */ { 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(var){ 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_2022: 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *chosenConverterName = escSeqStateTable_Result_2022[offset]; 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(chosenConverterName == NULL) { 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* SS2 or SS3 */ 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 87885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _this->toUCallbackReason = UCNV_UNASSIGNED; 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->mode = UCNV_SI; 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(myData2022->currentConverter); 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err); 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*err)) { 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->mode = UCNV_SO; 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_2022_JP: 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 89485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset]; 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(tempState) { 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case INVALID_STATE: 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case SS2_STATE: 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.cs[2]!=0) { 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.g<2) { 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.prevG=myData2022->toU2022State.g; 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.g=2; 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* illegal to have SS2 before a matching designator */ 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case SS3_STATE: not used in ISO-2022-JP-x */ 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_1: 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_7: 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G2 charset for SS2 */ 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[2]=(int8_t)tempState; 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 charset */ 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[0]=(int8_t)tempState; 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_2022_CN: 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 93385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(tempState) { 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case INVALID_STATE: 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case SS2_STATE: 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.cs[2]!=0) { 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.g<2) { 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.prevG=myData2022->toU2022State.g; 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.g=2; 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* illegal to have SS2 before a matching designator */ 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case SS3_STATE: 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.cs[3]!=0) { 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->toU2022State.g<2) { 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.prevG=myData2022->toU2022State.g; 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.g=3; 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* illegal to have SS3 before a matching designator */ 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_IR_165: 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->version==0) { 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*fall through*/ 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case GB2312_1: 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*fall through*/ 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case CNS_11643_1: 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[1]=(int8_t)tempState; 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case CNS_11643_2: 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[2]=(int8_t)tempState; 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* other CNS 11643 planes */ 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData2022->version==0) { 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData2022->toU2022State.cs[3]=(int8_t)tempState; 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_2022_KR: 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offset==0x30){ 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* nothing to be done, just accept this one escape sequence */ 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_ESCAPE_SEQUENCE; 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*err)) { 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _this->toULength = 0; 100085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { 100185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(_this->toULength>1) { 100285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 100385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Ticket 5691: consistent illegal sequences: 100485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - We include at least the first byte (ESC) in the illegal sequence. 100585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - If any of the non-initial bytes could be the start of a character, 100685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * we stop the illegal sequence before the first one of those. 100785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In escape sequences, all following bytes are "printable", that is, 100885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS), 100985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * they are valid single/lead bytes. 101085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * For simplicity, we always only report the initial ESC byte as the 101185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * illegal sequence and back out all other bytes we looked at. 101285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 101385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Back out some bytes. */ 101485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t backOutDistance=_this->toULength-1; 101585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t bytesFromThisBuffer=_this->toULength-initialToULength; 101685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(backOutDistance<=bytesFromThisBuffer) { 101785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* same as initialToULength<=1 */ 101885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *source-=backOutDistance; 101985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 102085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Back out bytes from the previous buffer: Need to replay them. */ 102185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); 102285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* same as -(initialToULength-1) */ 102385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* preToULength is negative! */ 102485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength); 102585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *source-=bytesFromThisBuffer; 102685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 102785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _this->toULength=1; 102885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 102985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { 103085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _this->toUCallbackReason = UCNV_UNASSIGNED; 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*Checks the characters of the buffer against valid 2022 escape sequences 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*if the match we return a pointer to the initial start of the sequence otherwise 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*we return sourceLimit 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*for 2022 looks ahead in the stream 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *to determine the longest possible convertible 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *data stream 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1042103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline const char* 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetEndOfBuffer_2022(const char** source, 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* sourceLimit, 1045103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool /*flush*/){ 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* mySource = *source; 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*source >= sourceLimit) 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return sourceLimit; 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do{ 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*mySource == ESC_2022){ 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t i; 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t key = 0; 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offset; 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022; 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Kludge: I could not 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * figure out the reason for validating an escape sequence 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * twice - once here and once in changeState_2022(). 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is it possible to have an ESC character in a ISO2022 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * byte stream which is valid in a code page? Is it legal? 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=0; 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022); 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i++) { 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = getKey_2022(*(mySource+i), &key, &offset); 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (value > 0 || *mySource==ESC_2022) 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return mySource; 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ((value == VALID_NON_TERMINAL_2022)&&(!flush) ) 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return sourceLimit; 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }while (++mySource < sourceLimit); 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return sourceLimit; 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(mySource < sourceLimit && *mySource != ESC_2022) { 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++mySource; 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return mySource; 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c 1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * any future change in _MBCSFromUChar32() function should be reflected here. 1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return number of bytes in *value; negative number if fallback; 0 if no mapping 1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1094103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline int32_t 1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruMBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData, 1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, 1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t* value, 1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback, 1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int outputType) 1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const int32_t *cx; 1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint16_t *table; 1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t stage2Entry; 1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t myValue; 1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *p; 1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO(markus): Use and require new, faster MBCS conversion table structures. 1109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Use internal version of ucnv_open() that verifies that the new structures are available, 1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * else U_INTERNAL_PROGRAM_ERROR. 1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru table=sharedData->mbcs.fromUnicodeTable; 1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the bytes and the length for the output */ 1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(outputType==MBCS_OUTPUT_2){ 1118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myValue<=0xff) { 1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=1; 1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* outputType==MBCS_OUTPUT_3 */ { 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myValue<=0xff) { 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=1; 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(myValue<=0xffff) { 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=3; 1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* is this code point assigned, or do we use fallbacks? */ 1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((stage2Entry&(1<<(16+(c&0xf))))!=0) { 1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* assigned */ 1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *value=myValue; 1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return length; 1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) { 1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We allow a 0 byte output if the "assigned" bit is set for this entry. 1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * There is no way with this data structure for fallback output 1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to be a zero byte. 1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *value=myValue; 1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -length; 1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cx=sharedData->mbcs.extIndexes; 1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cx!=NULL) { 1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ucnv_extSimpleMatchFromU(cx, c, value, useFallback); 1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unassigned */ 1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c 1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * any future change in _MBCSSingleFromUChar32() function should be reflected here. 1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param retval pointer to output byte 1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return 1 roundtrip byte 0 no mapping -1 fallback byte 1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1165103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline int32_t 1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruMBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData, 1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, 1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t* retval, 1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback) 1170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 1171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint16_t *table; 1172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t value; 1173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 1174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 1175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 1176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ 1178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru table=sharedData->mbcs.fromUnicodeTable; 1179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the byte for the output */ 1180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); 1181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* is this code point assigned, or do we use fallbacks? */ 1182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *retval=(uint32_t)(value&0xff); 1183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value>=0xf00) { 1184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 1; /* roundtrip */ 1185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(useFallback ? value>=0x800 : value>=0xc00) { 1186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; /* fallback taken */ 1187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* no mapping */ 1189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1192c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* 1193c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Check that the result is a 2-byte value with each byte in the range A1..FE 1194c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte 1195c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * to move it to the ISO 2022 range 21..7E. 1196c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Return 0 if out of range. 1197c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1198103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline uint32_t 1199c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru_2022FromGR94DBCS(uint32_t value) { 1200c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && 1201c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) 1202c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ) { 1203c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value - 0x8080; /* shift down to 21..7e byte range */ 1204c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1205c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0; /* not valid for ISO 2022 */ 1206c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1207c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1208c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 120985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */ 121085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 121185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the 121285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point 121385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * unchanged. 121485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 1215103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline uint32_t 121685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_2022ToGR94DBCS(uint32_t value) { 121785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t returnValue = value + 0x8080; 121885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) && 121985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) { 122085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return returnValue; 122185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 122285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return value; 122385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 122485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 122585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 122685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/********************************************************************************** 1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ISO-2022 Converter 1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruT_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, 1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* mySourceLimit, *realSourceLimit; 1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* sourceStart; 1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* myTargetStart; 1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter* saveThis; 1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData; 1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t length; 1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveThis = args->converter; 1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData=((UConverterDataISO2022*)(saveThis->extraInfo)); 1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru realSourceLimit = args->sourceLimit; 1249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (args->source < realSourceLimit) { 1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key == 0) { /* are we in the middle of an escape sequence? */ 1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ 1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush); 1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->source < mySourceLimit) { 1255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->currentConverter==NULL) { 1256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->currentConverter = ucnv_open("ASCII",err); 1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)){ 1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; 1262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveThis->mode = UCNV_SO; 1263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert to before the ESC or until the end of the buffer */ 1266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->isFirstBuffer=FALSE; 1267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceStart = args->source; 1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myTargetStart = args->target; 1269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter = myData->currentConverter; 1270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode(args->converter, 1271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &args->target, 1272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->targetLimit, 1273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &args->source, 1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceLimit, 1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets, 1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UBool)(args->flush && mySourceLimit == realSourceLimit), 1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter = saveThis; 1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*err == U_BUFFER_OVERFLOW_ERROR) { 1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move the overflow buffer */ 1282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength; 1283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->currentConverter->UCharErrorBufferLength = 0; 1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length > 0) { 1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(saveThis->UCharErrorBuffer, 1286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->currentConverter->UCharErrorBuffer, 1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length*U_SIZEOF_UCHAR); 1288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At least one of: 1294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * -Error while converting 1295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * -Done with entire buffer 1296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * -Need to write offsets or update the current offset 1297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (leave that up to the code in ucnv.c) 1298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or else we just stopped at an ESC byte and continue with changeState_2022() 1300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*err) || 1302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (args->source == realSourceLimit) || 1303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) || 1304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0)) 1305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy partial or error input for truncated detection and error handling */ 1307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)) { 1308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength; 1309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length > 0) { 1310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length); 1311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = saveThis->toULength = myData->currentConverter->toULength; 1314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length > 0) { 1315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length); 1316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->source < mySourceLimit) { 1317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */ 1318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceStart = args->source; 1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru changeState_2022(args->converter, 1328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &(args->source), 1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru realSourceLimit, 1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022, 1331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) { 1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* let the ucnv.c code update its current offset */ 1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * To Unicode Callback helper function 1343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerutoUnicodeCallback(UConverter *cnv, 1346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t sourceChar, const uint32_t targetUniChar, 1347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 1348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar>0xff){ 1349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0] = (uint8_t)(sourceChar>>8); 1350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1] = (uint8_t)sourceChar; 1351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = 2; 1352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 1354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0] =(char) sourceChar; 1355c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru cnv->toULength = 1; 1356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){ 1359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 1360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 1362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_CHAR_FOUND; 1363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**************************************ISO-2022-JP*************************************************/ 1367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/************************************** IMPORTANT ************************************************** 1369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and 1370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32(). 1371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The converter iterates over each Unicode codepoint 1372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is 1373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* processed one char at a time it would make sense to reduce the extra processing a canned converter 1374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* would do as far as possible. 1375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* If the implementation of these macros or structure of sharedData struct change in the future, make 1377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* sure that ISO-2022 is also changed. 1378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*************************************************************************************************** 1379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************************************************************************************************** 1382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Rules for ISO-2022-jp encoding 1383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (i) Escape sequences must be fully contained within a line they should not 1384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* span new lines or CRs 1385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (ii) If the last character on a line is represented by two bytes then an ASCII or 1386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* JIS-Roman character escape sequence should follow before the line terminates 1387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (iii) If the first character on the line is represented by two bytes then a two 1388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* byte character escape sequence should precede it 1389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (iv) If no escape sequence is encountered then the characters are ASCII 1390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2, 1391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and invoked with SS2 (ESC N). 1392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (vi) If there is any G0 designation in text, there must be a switch to 1393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ASCII or to JIS X 0201-Roman before a space character (but not 1394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control 1395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* characters such as tab or CRLF. 1396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (vi) Supported encodings: 1397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7 1398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* source : RFC-1554 1400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* JISX201, JISX208,JISX212 : new .cnv data files created 1402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* KSC5601 : alias to ibm-949 mapping table 1403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* GB2312 : alias to ibm-1386 mapping table 1404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ISO-8859-1 : Algorithmic implemented as LATIN1 case 1405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ISO-8859-7 : alisas to ibm-9409 mapping table 1406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* preference order of JP charsets */ 1409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const StateEnum jpCharsetPref[]={ 1410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ASCII, 1411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX201, 1412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO8859_1, 1413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO8859_7, 1414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX208, 1415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru JISX212, 1416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru GB2312, 1417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru KSC5601, 1418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru HWKANA_7BIT 1419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The escape sequences must be in order of the enum constants like JISX201 = 3, 1423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not in order of jpCharsetPref[]! 1424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char escSeqChars[][6] ={ 1426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x28\x42", /* <ESC>(B ASCII */ 1427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ 1428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */ 1429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x28\x4A", /* <ESC>(J JISX-201 */ 1430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x24\x42", /* <ESC>$B JISX-208 */ 1431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */ 1432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x24\x41", /* <ESC>$A GB2312 */ 1433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */ 1434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */ 1435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 143785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const int8_t escSeqCharsLen[] ={ 1438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>(B ASCII */ 1439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>.A ISO-8859-1 */ 1440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>.F ISO-8859-7 */ 1441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>(J JISX-201 */ 1442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>$B JISX-208 */ 1443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4, /* length of <ESC>$(D JISX-212 */ 1444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* length of <ESC>$A GB2312 */ 1445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4, /* length of <ESC>$(C KSC5601 */ 1446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3 /* length of <ESC>(I HWKANA_7BIT */ 1447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The iteration over various code pages works this way: 1451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) Get the currentState from myConverterData->currentState 1452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) Check if the character is mapped to a valid character in the currentState 1453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Yes -> a) set the initIterState to currentState 1454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* b) remain in this state until an invalid character is found 1455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* No -> a) go to the next code page and find the character 1456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Before changing the state increment the current state check if the current state 1457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is equal to the intitIteration state 1458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Yes -> A character that cannot be represented in any of the supported encodings 1459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* break and return a U_INVALID_CHARACTER error 1460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* No -> Continue and find the character in next code page 1461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 1463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* TODO: Implement a priority technique where the users are allowed to set the priority of code pages 1464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 1465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1466c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* Map 00..7F to Unicode according to JIS X 0201. */ 1467103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline uint32_t 1468c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querujisx201ToU(uint32_t value) { 1469c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value < 0x5c) { 1470c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value; 1471c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(value == 0x5c) { 1472c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0xa5; 1473c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(value == 0x7e) { 1474c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0x203e; 1475c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else /* value <= 0x7f */ { 1476c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value; 1477c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1478c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1479c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1480c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */ 1481103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline uint32_t 1482c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querujisx201FromU(uint32_t value) { 1483c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value<=0x7f) { 1484c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value!=0x5c && value!=0x7e) { 1485c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value; 1486c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1487c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(value==0xa5) { 1488c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0x5c; 1489c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(value==0x203e) { 1490c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0x7e; 1491c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1492c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0xfffe; 1493c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1494c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1495c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* 1496c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Take a valid Shift-JIS byte pair, check that it is in the range corresponding 1497c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * to JIS X 0208, and convert it to a pair of 21..7E bytes. 1498c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Return 0 if the byte pair is out of range. 1499c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1500103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline uint32_t 1501c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru_2022FromSJIS(uint32_t value) { 1502c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru uint8_t trail; 1503c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1504c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value > 0xEFFC) { 1505c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return 0; /* beyond JIS X 0208 */ 1506c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1507c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1508c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru trail = (uint8_t)value; 1509c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1510c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value &= 0xff00; /* lead byte */ 1511c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value <= 0x9f00) { 1512c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value -= 0x7000; 1513c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else /* 0xe000 <= value <= 0xef00 */ { 1514c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value -= 0xb000; 1515c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1516c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value <<= 1; 1517c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1518c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(trail <= 0x9e) { 1519c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value -= 0x100; 1520c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(trail <= 0x7e) { 1521c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value |= trail - 0x1f; 1522c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1523c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value |= trail - 0x20; 1524c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1525c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else /* trail <= 0xfc */ { 1526c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value |= trail - 0x7e; 1527c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1528c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru return value; 1529c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1530c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1531c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* 1532c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS. 1533c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * If either byte is outside 21..7E make sure that the result is not valid 1534c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * for Shift-JIS so that the converter catches it. 1535c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Some invalid byte values already turn into equally invalid Shift-JIS 1536c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * byte values and need not be tested explicitly. 1537c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1538103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline void 1539c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) { 1540c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(c1&1) { 1541c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ++c1; 1542c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(c2 <= 0x5f) { 1543c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 += 0x1f; 1544c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(c2 <= 0x7e) { 1545c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 += 0x20; 1546c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1547c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 = 0; /* invalid */ 1548c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1549c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1550c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) { 1551c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 += 0x7e; 1552c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1553c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c2 = 0; /* invalid */ 1554c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1555c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1556c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c1 >>= 1; 1557c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(c1 <= 0x2f) { 1558c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c1 += 0x70; 1559c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(c1 <= 0x3f) { 1560c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c1 += 0xb0; 1561c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else { 1562c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru c1 = 0; /* invalid */ 1563c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1564c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru bytes[0] = (char)c1; 1565c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru bytes[1] = (char)c2; 1566c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru} 1567c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1568c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru/* 1569c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS) 1570c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Katakana. 1571c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks 1572c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * because Shift-JIS roundtrips half-width Katakana to single bytes. 1573c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * These were the only fallbacks in ICU's jisx-208.ucm file. 1574c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1575c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Querustatic const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = { 1576c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2123, /* U+FF61 */ 1577c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2156, 1578c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2157, 1579c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2122, 1580c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2126, 1581c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2572, 1582c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2521, 1583c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2523, 1584c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2525, 1585c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2527, 1586c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2529, 1587c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2563, 1588c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2565, 1589c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2567, 1590c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2543, 1591c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x213C, /* U+FF70 */ 1592c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2522, 1593c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2524, 1594c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2526, 1595c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2528, 1596c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x252A, 1597c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x252B, 1598c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x252D, 1599c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x252F, 1600c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2531, 1601c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2533, 1602c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2535, 1603c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2537, 1604c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2539, 1605c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x253B, 1606c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x253D, 1607c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x253F, /* U+FF80 */ 1608c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2541, 1609c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2544, 1610c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2546, 1611c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2548, 1612c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254A, 1613c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254B, 1614c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254C, 1615c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254D, 1616c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254E, 1617c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x254F, 1618c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2552, 1619c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2555, 1620c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2558, 1621c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x255B, 1622c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x255E, 1623c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x255F, /* U+FF90 */ 1624c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2560, 1625c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2561, 1626c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2562, 1627c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2564, 1628c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2566, 1629c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2568, 1630c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2569, 1631c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256A, 1632c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256B, 1633c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256C, 1634c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256D, 1635c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x256F, 1636c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x2573, 1637c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x212B, 1638c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 0x212C /* U+FF9F */ 1639c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru}; 1640c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) { 1643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 1644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *converterData; 1645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pFromU2022State; 1646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target = (uint8_t *) args->target; 1647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; 1648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* source = args->source; 1649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* sourceLimit = args->sourceLimit; 1650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t* offsets = args->offsets; 1651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 sourceChar; 1652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char buffer[8]; 1653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len, outLen; 1654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t choices[10]; 1655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t choiceCount; 1656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetValue = 0; 1657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback; 1658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 1660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs, g; 1661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the state */ 1663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData = (UConverterDataISO2022*)cnv->extraInfo; 1664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State = &converterData->fromU2022State; 1665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 1667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check if the last codepoint of previous buffer was a lead surrogate*/ 1669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { 1670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 1671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source < sourceLimit) { 1674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < targetLimit) { 1675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar = *(source++); 1677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*check if the char is a First surrogate*/ 1678103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE(sourceChar)) { 1679103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE_LEAD(sourceChar)) { 1680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 1681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*look ahead to find the trail surrogate*/ 1682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source < sourceLimit) { 1683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 1684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail=(UChar) *source; 1685103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_TRAIL(trail)) { 1686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source++; 1687103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); 1688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=0x00; 1689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this supplementary code point */ 1690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 1691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 1693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 1695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 1700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 1705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 1707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not convert SO/SI/ESC */ 1713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(IS_2022_CONTROL(sourceChar)) { 1714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 1715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 1716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do the conversion */ 1721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choiceCount == 0) { 1723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t csm; 1724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The csm variable keeps track of which charsets are allowed 1727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and not used yet while building the choices[]. 1728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm = jpCharsetMasks[converterData->version]; 1730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 1731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS7/8: try single-byte half-width Katakana before JISX208 */ 1733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(converterData->version == 3 || converterData->version == 4) { 1734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[choiceCount++] = (int8_t)HWKANA_7BIT; 1735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Do not try single-byte half-width Katakana for other versions. */ 1737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm &= ~CSM(HWKANA_7BIT); 1738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try the current G0 charset */ 1740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[choiceCount++] = cs = pFromU2022State->cs[0]; 1741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm &= ~CSM(cs); 1742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try the current G2 charset */ 1744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((cs = pFromU2022State->cs[2]) != 0) { 1745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[choiceCount++] = cs; 1746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm &= ~CSM(cs); 1747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try all the other possible charsets */ 1750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) { 1751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (int8_t)jpCharsetPref[i]; 1752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(CSM(cs) & csm) { 1753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[choiceCount++] = cs; 1754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru csm &= ~CSM(cs); 1755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = g = 0; 1760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len==0: no mapping found yet 1762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks 1763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len>0: found a roundtrip result, done 1764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 0; 1766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We will turn off useFallback after finding a fallback, 1768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but we still get fallbacks from PUA code points as usual. 1769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Therefore, we will also need to check that we don't overwrite 1770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * an early fallback with a later one. 1771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = cnv->useFallback; 1773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < choiceCount && len <= 0; ++i) { 1775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 1776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len2; 1777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs0 = choices[i]; 1778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(cs0) { 1779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ASCII: 1780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar <= 0x7f) { 1781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = (uint32_t)sourceChar; 1782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 1783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 0; 1785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_1: 1788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(GR96_START <= sourceChar && sourceChar <= GR96_END) { 1789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = (uint32_t)sourceChar - 0x80; 1790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 1791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 2; 1793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case HWKANA_7BIT: 1796c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { 1797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(converterData->version==3) { 1798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS7: use G1 (SO) */ 1799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Shift U+FF61..U+FF9F to bytes 21..5F. */ 1800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21)); 1801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 1802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */ 1803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 1; 1804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(converterData->version==4) { 1805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */ 1806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Shift U+FF61..U+FF9F to bytes A1..DF. */ 1807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1)); 1808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 1809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = pFromU2022State->cs[0]; 1811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(IS_JP_DBCS(cs)) { 1812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* switch from a DBCS charset to JISX201 */ 1813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (int8_t)JISX201; 1814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* else stay in the current G0 charset */ 1816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 0; 1817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* else do not use HWKANA_7BIT with other versions */ 1819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case JISX201: 1822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 SBCS */ 1823c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value = jisx201FromU(sourceChar); 1824c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value <= 0x7f) { 1825c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru targetValue = value; 1826c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru len = 1; 1827c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru cs = cs0; 1828c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru g = 0; 1829c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru useFallback = FALSE; 1830c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1831c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1832c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case JISX208: 1833c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* G0 DBCS from Shift-JIS table */ 1834c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru len2 = MBCS_FROM_UCHAR32_ISO2022( 1835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[cs0], 1836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, &value, 1837c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru useFallback, MBCS_OUTPUT_2); 1838c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 1839c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value = _2022FromSJIS(value); 1840c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value != 0) { 1841c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru targetValue = value; 1842c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru len = len2; 1843c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru cs = cs0; 1844c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru g = 0; 1845c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru useFallback = FALSE; 1846c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1847c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(len == 0 && useFallback && 1848c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { 1849c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru targetValue = hwkana_fb[sourceChar - HWKANA_START]; 1850c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru len = -2; 1851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 0; 1853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 1854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_7: 1857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 SBCS forced to 7-bit output */ 1858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len2 = MBCS_SINGLE_FROM_UCHAR32( 1859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[cs0], 1860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, &value, 1861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback); 1862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) { 1863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = value - 0x80; 1864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = len2; 1865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 2; 1867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 1868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 DBCS */ 1872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len2 = MBCS_FROM_UCHAR32_ISO2022( 1873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[cs0], 1874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, &value, 1875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, MBCS_OUTPUT_2); 1876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 1877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs0 == KSC5601) { 1878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Check for valid bytes for the encoding scheme. 1880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is necessary because the sub-converter (windows-949) 1881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * has a broader encoding scheme than is valid for 2022. 1882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1883c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru value = _2022FromGR94DBCS(value); 1884c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(value == 0) { 1885c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = value; 1889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = len2; 1890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 1891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 0; 1892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 1893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len != 0) { 1899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len < 0) { 1900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = -len; /* fallback */ 1901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen = 0; /* count output bytes */ 1903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write SI if necessary (only for JIS7) */ 1905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g == 1 && g == 0) { 1906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = UCNV_SI; 1907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 1908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the designation sequence if necessary */ 1911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs != pFromU2022State->cs[g]) { 1912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t escLen = escSeqCharsLen[cs]; 1913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen); 1914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen += escLen; 1915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[g] = cs; 1916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* invalidate the choices[] */ 1918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 1919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the shift sequence if necessary */ 1922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(g != pFromU2022State->g) { 1923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(g) { 1924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* case 0 handled before writing escapes */ 1925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 1926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = UCNV_SO; 1927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 1; 1928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: /* case 2 */ 1930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = 0x1b; 1931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = 0x4e; 1932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no case 3: no SS3 in ISO-2022-JP-x */ 1934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the output bytes */ 1938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len == 1) { 1939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = (char)targetValue; 1940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* len == 2 */ { 1941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = (char)(targetValue >> 8); 1942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = (char)targetValue; 1943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if we cannot find the character after checking all codepages 1947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * then this is an error 1948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 1950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 1951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar == CR || sourceChar == LF) { 1955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */ 1956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[2] = 0; 1957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 1958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output outLen>0 bytes in buffer[] */ 1961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(outLen == 1) { 1962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[0]; 1963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) { 1964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */ 1965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(outLen == 2 && (target + 2) <= targetLimit) { 1967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[0]; 1968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[1]; 1969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) { 1970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)); 1971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = sourceIndex; 1972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = sourceIndex; 1973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 1976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 1977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer, outLen, 1978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 1979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)), 1980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 1981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)) { 1982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* end if(myTargetIndex<myTargetLength) */ 1986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 1987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 1988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }/* end while(mySourceIndex<mySourceLength) */ 1992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the end of the input stream and detection of truncated input 1995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are handled by the framework, but for ISO-2022-JP conversion 1996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to be in ASCII mode at the very end 1997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conditions: 1999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * successful 2000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in SO mode or not in ASCII mode 2001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of input and no truncated input 2002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U_SUCCESS(*err) && 2004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) && 2005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->flush && source>=sourceLimit && cnv->fromUChar32==0 2006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 2007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex; 2008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen = 0; 2010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g != 0) { 2012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[outLen++] = UCNV_SI; 2013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 2014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->cs[0] != ASCII) { 2017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t escLen = escSeqCharsLen[ASCII]; 2018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen); 2019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen += escLen; 2020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[0] = (int8_t)ASCII; 2021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the source index of the last input character */ 2024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO this would be simpler and more reliable if we used a pair 2026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of sourceIndex/prevSourceIndex like in ucnvmbcs.c 2027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so that we could simply use the prevSourceIndex here; 2028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this code gives an incorrect result for the rare case of an unmatched 2029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trail surrogate that is alone in the last buffer of the text stream 2030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=(int32_t)(source-args->source); 2032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceIndex>0) { 2033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 2034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U16_IS_TRAIL(args->source[sourceIndex]) && 2035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) 2036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 2037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 2038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=-1; 2041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 2044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 2045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer, outLen, 2046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 2047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 2048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 2049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*save the state and return */ 2052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = source; 2053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = (char*)target; 2054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** to unicode *******************/ 2057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, 2060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 2061c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru char tempBuf[2]; 2062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySource = (char *) args->source; 2063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *myTarget = args->target; 2064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySourceLimit = args->sourceLimit; 2065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetUniChar = 0x0000; 2066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t mySourceChar = 0x0000; 206785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t tmpSourceChar = 0x0000; 2068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData; 2069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pToU2022State; 2070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru StateEnum cs; 2071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData=(UConverterDataISO2022*)(args->converter->extraInfo); 2073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State = &myData->toU2022State; 2074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key != 0) { 2076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial escape sequence */ 2077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto escape; 2078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { 2079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial double-byte character */ 2080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar = args->converter->toUBytes[0]; 2081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 0; 2082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; 208385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = missingCharMarker; 2084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailByte; 2085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(mySource < mySourceLimit){ 2088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar =missingCharMarker; 2090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget < args->targetLimit){ 2092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar= (unsigned char) *mySource++; 2094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(mySourceChar) { 2096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UCNV_SI: 2097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->version==3) { 2098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=0; 2099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ 210285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* reset this, we have a different error */ 2103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UCNV_SO: 2107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->version==3) { 2108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS7: switch to G1 half-width Katakana */ 2109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->cs[1] = (int8_t)HWKANA_7BIT; 2110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=1; 2111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ 211485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* reset this, we have a different error */ 2115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ESC_2022: 2119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySource--; 2120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruescape: 212185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho { 212285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char * mySourceBefore = mySource; 212385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t toULengthBefore = args->converter->toULength; 212485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 212585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho changeState_2022(args->converter,&(mySource), 212685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceLimit, ISO_2022_JP,err); 212785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 212885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */ 212985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { 213085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *err = U_ILLEGAL_ESCAPE_SEQUENCE; 213185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUCallbackReason = UCNV_IRREGULAR; 213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore)); 213385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 213485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 2135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* invalid or illegal escape sequence */ 2137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)){ 2138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 2139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 214085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ 2141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 214385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* If we successfully completed an escape sequence, we begin a new segment, empty so far */ 214485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(myData->key==0) { 214585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = TRUE; 214685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 2147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */ 2150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case CR: 2152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*falls through*/ 2153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case LF: 2154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* automatically reset to single-byte mode */ 2155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) { 2156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->cs[0] = (int8_t)ASCII; 2157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->cs[2] = 0; 2159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g = 0; 2160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* falls through */ 2161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 2162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert one or two bytes */ 216385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; 2164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; 2165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 && 2166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru !IS_JP_DBCS(cs) 2167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 2168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */ 2169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = mySourceChar + (HWKANA_START - 0xa1); 2170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return from a single-shift state to the previous one */ 2172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pToU2022State->g >= 2) { 2173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=pToU2022State->prevG; 2174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else switch(cs) { 2176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ASCII: 2177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 2178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = mySourceChar; 2179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_1: 2182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 2183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = mySourceChar + 0x80; 2184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return from a single-shift state to the previous one */ 2186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=pToU2022State->prevG; 2187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO8859_7: 2189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 2190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert mySourceChar+0x80 to use a normal 8-bit table */ 2191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = 2192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( 2193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->myConverterArray[cs], 2194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar + 0x80); 2195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return from a single-shift state to the previous one */ 2197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=pToU2022State->prevG; 2198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case JISX201: 2200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 2201c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru targetUniChar = jisx201ToU(mySourceChar); 2202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case HWKANA_7BIT: 2205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { 2206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 7-bit halfwidth Katakana */ 2207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = mySourceChar + (HWKANA_START - 0x21); 2208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 2211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* G0 DBCS */ 2212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySource < mySourceLimit) { 221385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int leadIsOk, trailIsOk; 221485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint8_t trailByte; 2215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailByte: 221685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailByte = (uint8_t)*mySource; 221785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 221885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Ticket 5691: consistent illegal sequences: 221985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - We include at least the first byte in the illegal sequence. 222085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - If any of the non-initial bytes could be the start of a character, 222185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * we stop the illegal sequence before the first one of those. 222285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 222385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is 222485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * an ESC/SO/SI, we report only the first byte as the illegal sequence. 222585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Otherwise we convert or report the pair of bytes. 222685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 222785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); 222885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); 222985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (leadIsOk && trailIsOk) { 223085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 223185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tmpSourceChar = (mySourceChar << 8) | trailByte; 223285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(cs == JISX208) { 223385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf); 223485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = tmpSourceChar; 223585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 223685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */ 223785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = tmpSourceChar; 223885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (cs == KSC5601) { 223985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */ 224085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 224185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[0] = (char)(tmpSourceChar >> 8); 224285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[1] = (char)(tmpSourceChar); 224385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 224485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); 224585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 224685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 224785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 224885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* add another bit so that the code below writes 2 bytes in case of error */ 224985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; 2250c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 2251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toUBytes[0] = (uint8_t)mySourceChar; 2253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 1; 2254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 2255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* End of inner switch */ 2257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* End of outer switch */ 2259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){ 2260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 2261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 2262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++)=(UChar)targetUniChar; 2264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if(targetUniChar > missingCharMarker){ 2266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* disassemble the surrogate pair and write to output*/ 2267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar-=0x0010000; 2268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10)); 2269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 2270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 2271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++myTarget; 2273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget< args->targetLimit){ 2274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); 2275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 2276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 2277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++myTarget; 2279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= 2281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); 2282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Call the callback function*/ 2287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); 2288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */ 2292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 2293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 2297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 2298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 2299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************************************************************** 2303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Rules for ISO-2022-KR encoding 2304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) The KSC5601 designator sequence should appear only once in a file, 2305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* at the begining of a line before any KSC5601 characters. This usually 2306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* means that it appears by itself on the first line of the file 2307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) There are only 2 shifting sequences SO to shift into double byte mode 2308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and SI to shift into single byte mode 2309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 2310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter* saveConv = args->converter; 2314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo; 2315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter=myConverterData->currentConverter; 2316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32; 2318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_MBCSFromUnicodeWithOffsets(args,err); 2319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32; 2320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*err == U_BUFFER_OVERFLOW_ERROR) { 2322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->currentConverter->charErrorBufferLength > 0) { 2323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy( 2324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveConv->charErrorBuffer, 2325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBuffer, 2326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBufferLength); 2327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength; 2329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBufferLength = 0; 2330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter=saveConv; 2332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source = args->source; 2338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *sourceLimit = args->sourceLimit; 2339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned char *target = (unsigned char *) args->target; 2340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned char *targetLimit = (unsigned char *) args->targetLimit; 2341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t* offsets = args->offsets; 2342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetByteUnit = 0x0000; 2343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 sourceChar = 0x0000; 2344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isTargetByteDBCS; 2345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool oldIsTargetByteDBCS; 2346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *converterData; 2347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData* sharedData; 2348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback; 2349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length =0; 2350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData=(UConverterDataISO2022*)args->converter->extraInfo; 2352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* if the version is 1 then the user is requesting 2353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conversion with ibm-25546 pass the arguments to 2354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * MBCS converter and return 2355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(converterData->version==1){ 2357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err); 2358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize data */ 2362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sharedData = converterData->currentConverter->sharedData; 2363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = args->converter->useFallback; 2364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus; 2365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru oldIsTargetByteDBCS = isTargetByteDBCS; 2366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus; 2368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) { 2369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 2370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source < sourceLimit){ 2372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetByteUnit = missingCharMarker; 2374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < (unsigned char*) args->targetLimit){ 2376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar = *source++; 2377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not convert SO/SI/ESC */ 2379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(IS_2022_CONTROL(sourceChar)) { 2380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUChar32=sourceChar; 2383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2); 2387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length < 0) { 2388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = -length; /* fallback */ 2389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only DBCS or SBCS characters are expected*/ 2391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* DB characters with high bit set to 1 are expected */ 239285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if( length > 2 || length==0 || 239385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (length == 1 && targetByteUnit > 0x7f) || 239485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (length == 2 && 239585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) || 239685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1))) 239785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ) { 2398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetByteUnit=missingCharMarker; 2399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (targetByteUnit != missingCharMarker){ 2401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru oldIsTargetByteDBCS = isTargetByteDBCS; 2403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF); 2404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* append the shift sequence */ 2405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (oldIsTargetByteDBCS != isTargetByteDBCS ){ 2406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (isTargetByteDBCS) 2408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = UCNV_SO; 2409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 2410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = UCNV_SI; 2411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) 2412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(offsets++) = (int32_t)(source - args->source-1); 2413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the targetUniChar to target */ 2415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetByteUnit <= 0x00FF){ 2416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( target < targetLimit){ 2417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(target++) = (unsigned char) targetByteUnit; 2418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets){ 2419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(offsets++) = (int32_t)(source - args->source-1); 2420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit); 2424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 2425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < targetLimit){ 2428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80); 2429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets){ 2430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(offsets++) = (int32_t)(source - args->source-1); 2431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < targetLimit){ 2433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(target++) =(unsigned char) (targetByteUnit -0x80); 2434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets){ 2435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(offsets++) = (int32_t)(source - args->source-1); 2436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80); 2439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 2440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 2442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80); 2443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80); 2444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 2445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* oops.. the code point is unassingned 2451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * set the error and reason 2452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*check if the char is a First surrogate*/ 2455103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE(sourceChar)) { 2456103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE_LEAD(sourceChar)) { 2457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 2458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*look ahead to find the trail surrogate*/ 2459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source < sourceLimit) { 2460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 2461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail=(UChar) *source; 2462103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_TRAIL(trail)) { 2463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source++; 2464103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); 2465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 2466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this surrogate code point */ 2467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 2468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 2470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 2475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ZERO_ERROR; 2476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 2479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(unassigned) for a BMP code point */ 2484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 2485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUChar32=sourceChar; 2488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* end if(myTargetIndex<myTargetLength) */ 2491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 2493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }/* end while(mySourceIndex<mySourceLength) */ 2497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the end of the input stream and detection of truncated input 2500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are handled by the framework, but for ISO-2022-KR conversion 2501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to be in ASCII mode at the very end 2502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conditions: 2504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * successful 2505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not in ASCII mode 2506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of input and no truncated input 2507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U_SUCCESS(*err) && 2509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS && 2510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->flush && source>=sourceLimit && args->converter->fromUChar32==0 2511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 2512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex; 2513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we are switching to ASCII */ 2515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isTargetByteDBCS=FALSE; 2516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the source index of the last input character */ 2518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO this would be simpler and more reliable if we used a pair 2520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of sourceIndex/prevSourceIndex like in ucnvmbcs.c 2521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so that we could simply use the prevSourceIndex here; 2522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this code gives an incorrect result for the rare case of an unmatched 2523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trail surrogate that is alone in the last buffer of the text stream 2524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=(int32_t)(source-args->source); 2526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceIndex>0) { 2527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 2528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U16_IS_TRAIL(args->source[sourceIndex]) && 2529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) 2530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 2531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 2532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=-1; 2535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 2538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter, 2539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SHIFT_IN_STR, 1, 2540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 2541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 2542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 2543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*save the state and return */ 2546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = source; 2547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = (char*)target; 2548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS; 2549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/************************ To Unicode ***************************************/ 2552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args, 2555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 2556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char const* sourceStart; 2557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo); 2558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterToUnicodeArgs subArgs; 2560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t minArgsSize; 2561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the subconverter arguments */ 2563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->size<sizeof(UConverterToUnicodeArgs)) { 2564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minArgsSize = args->size; 2565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); 2567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(&subArgs, args, minArgsSize); 2570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.size = (uint16_t)minArgsSize; 2571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.converter = myData->currentConverter; 2572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remember the original start of the input for offsets */ 2574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceStart = args->source; 2575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key != 0) { 2577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial escape sequence */ 2578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto escape; 2579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(U_SUCCESS(*err) && args->source < args->sourceLimit) { 2582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ 2583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.source = args->source; 2584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush); 2585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subArgs.source != subArgs.sourceLimit) { 2586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * get the current partial byte sequence 2588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it needs to be moved between the public and the subconverter 2590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so that the conversion framework, which only sees the public 2591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * converter, can handle truncated and illegal input etc. 2592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->converter->toULength > 0) { 2594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength); 2595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.converter->toULength = args->converter->toULength; 2597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 2599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Convert up to the end of the input, or to before the next escape character. 2600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Does not handle conversion extensions because the preToU[] state etc. 2601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is not copied. 2602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 2603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); 2604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets != NULL && sourceStart != args->source) { 2606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* update offsets to base them on the actual start of the input */ 2607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets = args->offsets; 2608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target = args->target; 2609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t delta = (int32_t)(args->source - sourceStart); 2610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(target < subArgs.target) { 2611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*offsets >= 0) { 2612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets += delta; 2613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++offsets; 2615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++target; 2616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = subArgs.source; 2619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = subArgs.target; 2620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets = subArgs.offsets; 2621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy input/error/overflow buffers */ 2623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subArgs.converter->toULength > 0) { 2624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength); 2625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = subArgs.converter->toULength; 2627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*err == U_BUFFER_OVERFLOW_ERROR) { 2629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subArgs.converter->UCharErrorBufferLength > 0) { 2630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, 2631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.converter->UCharErrorBufferLength); 2632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; 2634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subArgs.converter->UCharErrorBufferLength = 0; 2635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*err) || (args->source == args->sourceLimit)) { 2639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruescape: 2643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru changeState_2022(args->converter, 2644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &(args->source), 2645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->sourceLimit, 2646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO_2022_KR, 2647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 2648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, 2653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 2654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char tempBuf[2]; 2655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySource = ( char *) args->source; 2656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *myTarget = args->target; 2657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySourceLimit = args->sourceLimit; 2658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 targetUniChar = 0x0000; 2659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar mySourceChar = 0x0000; 2660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData; 2661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData* sharedData ; 2662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback; 2663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData=(UConverterDataISO2022*)(args->converter->extraInfo); 2665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->version==1){ 2666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err); 2667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize state */ 2671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sharedData = myData->currentConverter->sharedData; 2672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = args->converter->useFallback; 2673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key != 0) { 2675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial escape sequence */ 2676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto escape; 2677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { 2678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial double-byte character */ 2679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar = args->converter->toUBytes[0]; 2680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 0; 2681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailByte; 2682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(mySource< mySourceLimit){ 2685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget < args->targetLimit){ 2687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar= (unsigned char) *mySource++; 2689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar==UCNV_SI){ 2691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->toU2022State.g = 0; 269285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (myData->isEmptySegment) { 269385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ 269485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *err = U_ILLEGAL_ESCAPE_SEQUENCE; 269585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUCallbackReason = UCNV_IRREGULAR; 269685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUBytes[0] = (uint8_t)mySourceChar; 269785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toULength = 1; 269885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->target = myTarget; 269985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->source = mySource; 270085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 270185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 2702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*consume the source */ 2703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else if(mySourceChar==UCNV_SO){ 2705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData->toU2022State.g = 1; 270685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ 2707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*consume the source */ 2708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else if(mySourceChar==ESC_2022){ 2710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySource--; 2711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruescape: 271285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */ 2713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru changeState_2022(args->converter,&(mySource), 2714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceLimit, ISO_2022_KR, err); 2715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)){ 2716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 2717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 2718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 2719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 2721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 272385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */ 2724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->toU2022State.g == 1) { 2725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySource < mySourceLimit) { 272685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int leadIsOk, trailIsOk; 272785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint8_t trailByte; 2728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailByte: 272985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = missingCharMarker; 273085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailByte = (uint8_t)*mySource; 273185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 273285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Ticket 5691: consistent illegal sequences: 273385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - We include at least the first byte in the illegal sequence. 273485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - If any of the non-initial bytes could be the start of a character, 273585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * we stop the illegal sequence before the first one of those. 273685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 273785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is 273885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * an ESC/SO/SI, we report only the first byte as the illegal sequence. 273985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Otherwise we convert or report the pair of bytes. 274085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 274185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); 274285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); 274385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (leadIsOk && trailIsOk) { 274485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 274585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[0] = (char)(mySourceChar + 0x80); 274685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[1] = (char)(trailByte + 0x80); 2747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback); 274885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = (mySourceChar << 8) | trailByte; 274985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 275085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 275185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 275285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* add another bit so that the code below writes 2 bytes in case of error */ 275385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; 2754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toUBytes[0] = (uint8_t)mySourceChar; 2757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 1; 2758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 276185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho else if(mySourceChar <= 0x7f) { 2762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback); 276385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 276485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = 0xffff; 2765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetUniChar < 0xfffe){ 2767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets) { 2768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 2769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++)=(UChar)targetUniChar; 2771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 2773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Call the callback function*/ 2774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); 2775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 2780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 2784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 2785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 2786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************************** END ISO2022-KR *********************************/ 2788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************************** ISO-2022-CN ********************************* 2790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Rules for ISO-2022-CN Encoding: 2792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* i) The designator sequence must appear once on a line before any instance 2793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* of character set it designates. 2794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ii) If two lines contain characters from the same character set, both lines 2795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* must include the designator sequence. 2796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iii) Once the designator sequence is known, a shifting sequence has to be found 2797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* to invoke the shifting 2798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* iv) All lines start in ASCII and end in ASCII. 2799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* v) Four shifting sequences are employed for this purpose: 2800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Sequcence ASCII Eq Charsets 2802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ---------- ------- --------- 2803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SI <SI> US-ASCII 2804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165 2805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS2 <ESC>N CNS-11643-1992 Plane 2 2806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS3 <ESC>O CNS-11643-1992 Planes 3-7 2807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* vi) 2809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SOdesignator : ESC "$" ")" finalchar_for_SO 2810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS2designator : ESC "$" "*" finalchar_for_SS2 2811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS3designator : ESC "$" "+" finalchar_for_SS3 2812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ ) A Indicates the bytes following SO are Chinese 2814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* characters as defined in GB 2312-80, until 2815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* another SOdesignation appears 2816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ ) E Indicates the bytes following SO are as defined 2819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* in ISO-IR-165 (for details, see section 2.1), 2820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* until another SOdesignation appears 2821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ ) G Indicates the bytes following SO are as defined 2823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* in CNS 11643-plane-1, until another 2824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SOdesignation appears 2825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ * H Indicates the two bytes immediately following 2827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* SS2 is a Chinese character as defined in CNS 2828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-2, until another SS2designation 2829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (Meaning <ESC>N must preceed every 2 byte 2831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* sequence.) 2832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + I Indicates the immediate two bytes following SS3 2834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-3, until another SS3designation 2836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (Meaning <ESC>O must preceed every 2 byte 2838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* sequence.) 2839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + J Indicates the immediate two bytes following SS3 2841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-4, until another SS3designation 2843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* (In English: <ESC>O must preceed every 2 byte 2845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* sequence.) 2846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + K Indicates the immediate two bytes following SS3 2848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-5, until another SS3designation 2850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + L Indicates the immediate two bytes following SS3 2853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-6, until another SS3designation 2855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* ESC $ + M Indicates the immediate two bytes following SS3 2858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* is a Chinese character as defined in CNS 2859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11643-plane-7, until another SS3designation 2860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appears 2861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and 2863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* has its own designation information before any Chinese characters 2864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* appear 2865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 2866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 2867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2868103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/* The following are defined this way to make the strings truly readonly */ 2869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char GB_2312_80_STR[] = "\x1B\x24\x29\x41"; 2870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45"; 2871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47"; 2872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48"; 2873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49"; 2874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A"; 2875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B"; 2876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C"; 2877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D"; 2878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/********************** ISO2022-CN Data **************************/ 2880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char* const escSeqCharsCN[10] ={ 2881103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius SHIFT_IN_STR, /* 0 ASCII */ 2882103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius GB_2312_80_STR, /* 1 GB2312_1 */ 2883103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius ISO_IR_165_STR, /* 2 ISO_IR_165 */ 2884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_1_STR, 2885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_2_STR, 2886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_3_STR, 2887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_4_STR, 2888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_5_STR, 2889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_6_STR, 2890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CNS_11643_1992_Plane_7_STR 2891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 2892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 2894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){ 2895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 2896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *converterData; 2897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pFromU2022State; 2898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target = (uint8_t *) args->target; 2899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; 2900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* source = args->source; 2901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar* sourceLimit = args->sourceLimit; 2902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t* offsets = args->offsets; 2903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 sourceChar; 2904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char buffer[8]; 2905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len; 2906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t choices[3]; 2907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t choiceCount; 2908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetValue = 0; 2909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback; 2910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the state */ 2912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData = (UConverterDataISO2022*)cnv->extraInfo; 2913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State = &converterData->fromU2022State; 2914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 2916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check if the last codepoint of previous buffer was a lead surrogate*/ 2918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { 2919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrail; 2920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while( source < sourceLimit){ 2923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target < targetLimit){ 2924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar = *(source++); 2926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*check if the char is a First surrogate*/ 2927103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE(sourceChar)) { 2928103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE_LEAD(sourceChar)) { 2929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrail: 2930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*look ahead to find the trail surrogate*/ 2931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source < sourceLimit) { 2932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test the following code unit */ 2933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail=(UChar) *source; 2934103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_TRAIL(trail)) { 2935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source++; 2936103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); 2937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=0x00; 2938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert this supplementary code point */ 2939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 2940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched lead code unit (1st surrogate) */ 2942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 2945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 2949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 2950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unmatched trail code unit (2nd surrogate) */ 2954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 2957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do the conversion */ 2962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar <= 0x007f ){ 2963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not convert SO/SI/ESC */ 2964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(IS_2022_CONTROL(sourceChar)) { 2965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 2966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 2967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 2968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 2969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* US-ASCII */ 2972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g == 0) { 2973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[0] = (char)sourceChar; 2974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 1; 2975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 2976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[0] = UCNV_SI; 2977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[1] = (char)sourceChar; 2978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 2; 2979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 2980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 2981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceChar == CR || sourceChar == LF) { 2983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset the state at the end of a line */ 2984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(pFromU2022State, 0, sizeof(ISO2022State)); 2985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 2986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 2988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 2989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert U+0080..U+10ffff */ 2990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 2991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs, g; 2992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choiceCount == 0) { 2994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try the current SO/G1 converter first */ 2995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[0] = pFromU2022State->cs[1]; 2996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* default to GB2312_1 if none is designated yet */ 2998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choices[0] == 0) { 2999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[0] = GB2312_1; 3000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(converterData->version == 0) { 3003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-CN */ 3004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */ 3006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choices[0] == GB2312_1) { 3007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)CNS_11643_1; 3008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)GB2312_1; 3010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 2; 301327f654740f2a26ad62a5c155af9199af9e69b889claireho } else if (converterData->version == 1) { 3014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-CN-EXT */ 3015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try one of the other converters */ 3017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(choices[0]) { 3018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case GB2312_1: 3019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)CNS_11643_1; 3020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[2] = (int8_t)ISO_IR_165; 3021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ISO_IR_165: 3023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)GB2312_1; 3024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[2] = (int8_t)CNS_11643_1; 3025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: /* CNS_11643_x */ 3027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[1] = (int8_t)GB2312_1; 3028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choices[2] = (int8_t)ISO_IR_165; 3029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 3; 303327f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 303427f654740f2a26ad62a5c155af9199af9e69b889claireho choices[0] = (int8_t)CNS_11643_1; 303527f654740f2a26ad62a5c155af9199af9e69b889claireho choices[1] = (int8_t)GB2312_1; 3036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = g = 0; 3040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len==0: no mapping found yet 3042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks 3043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * len>0: found a roundtrip result, done 3044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 0; 3046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We will turn off useFallback after finding a fallback, 3048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * but we still get fallbacks from PUA code points as usual. 3049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Therefore, we will also need to check that we don't overwrite 3050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * an early fallback with a later one. 3051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = cnv->useFallback; 3053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; i < choiceCount && len <= 0; ++i) { 3055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs0 = choices[i]; 3056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs0 > 0) { 3057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 3058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len2; 305985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(cs0 >= CNS_11643_0) { 3060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len2 = MBCS_FROM_UCHAR32_ISO2022( 3061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[CNS_11643], 3062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, 3063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 3064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, 3065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru MBCS_OUTPUT_3); 3066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 == 3 || (len2 == -3 && len == 0)) { 3067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = value; 3068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80); 3069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 >= 0) { 3070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 2; 3071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = -2; 3073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 3074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs == CNS_11643_1) { 3076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 1; 3077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(cs == CNS_11643_2) { 3078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 2; 3079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* plane 3..7 */ if(converterData->version == 1) { 3080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 3; 3081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-CN (without -EXT) does not support plane 3..7 */ 3083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 0; 3084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* GB2312_1 or ISO-IR-165 */ 3088103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS); 3089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len2 = MBCS_FROM_UCHAR32_ISO2022( 3090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru converterData->myConverterArray[cs0], 3091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceChar, 3092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 3093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, 3094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru MBCS_OUTPUT_2); 3095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len2 == 2 || (len2 == -2 && len == 0)) { 3096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetValue = value; 3097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = len2; 3098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = cs0; 3099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru g = 1; 3100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback = FALSE; 3101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len != 0) { 3107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 0; /* count output bytes; it must have been abs(len) == 2 */ 3108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the designation sequence if necessary */ 3110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs != pFromU2022State->cs[g]) { 3111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs < CNS_11643) { 3112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer, escSeqCharsCN[cs], 4); 3113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3114103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(cs >= CNS_11643_1); 3115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4); 3116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = 4; 3118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[g] = cs; 3119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(g == 1) { 3120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* changing the SO/G1 charset invalidates the choices[] */ 3121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru choiceCount = 0; 3122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the shift sequence if necessary */ 3126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(g != pFromU2022State->g) { 3127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(g) { 3128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 3129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = UCNV_SO; 3130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */ 3132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 1; 3133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 3135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = 0x1b; 3136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = 0x4e; 3137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: /* case 3 */ 3139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = 0x1b; 3140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = 0x4f; 3141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the two output bytes */ 3146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = (char)(targetValue >> 8); 3147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[len++] = (char)targetValue; 3148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* if we cannot find the character after checking all codepages 3150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * then this is an error 3151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INVALID_CHAR_FOUND; 3153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=sourceChar; 3154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output len>0 bytes in buffer[] */ 3159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(len == 1) { 3160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[0]; 3161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) { 3162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */ 3163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(len == 2 && (target + 2) <= targetLimit) { 3165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[0]; 3166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++ = buffer[1]; 3167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets) { 3168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)); 3169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = sourceIndex; 3170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ = sourceIndex; 3171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 3174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 3175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer, len, 3176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 3177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)), 3178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 3179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)) { 3180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } /* end if(myTargetIndex<myTargetLength) */ 3184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 3185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 3186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }/* end while(mySourceIndex<mySourceLength) */ 3190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the end of the input stream and detection of truncated input 3193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are handled by the framework, but for ISO-2022-CN conversion 3194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to be in ASCII mode at the very end 3195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 3196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * conditions: 3197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * successful 3198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * not in ASCII mode 3199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of input and no truncated input 3200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U_SUCCESS(*err) && 3202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g!=0 && 3203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->flush && source>=sourceLimit && cnv->fromUChar32==0 3204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 3205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceIndex; 3206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we are switching to ASCII */ 3208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g=0; 3209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the source index of the last input character */ 3211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TODO this would be simpler and more reliable if we used a pair 3213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of sourceIndex/prevSourceIndex like in ucnvmbcs.c 3214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so that we could simply use the prevSourceIndex here; 3215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this code gives an incorrect result for the rare case of an unmatched 3216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trail surrogate that is alone in the last buffer of the text stream 3217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=(int32_t)(source-args->source); 3219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sourceIndex>0) { 3220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 3221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( U16_IS_TRAIL(args->source[sourceIndex]) && 3222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) 3223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 3224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --sourceIndex; 3225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=-1; 3228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUWriteUInt8( 3231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 3232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru SHIFT_IN_STR, 1, 3233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, (const char *)targetLimit, 3234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 3235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru err); 3236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*save the state and return */ 3239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = source; 3240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = (char*)target; 3241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 3245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, 3246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode* err){ 3247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char tempBuf[3]; 3248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySource = (char *) args->source; 3249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *myTarget = args->target; 3250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *mySourceLimit = args->sourceLimit; 3251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetUniChar = 0x0000; 3252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t mySourceChar = 0x0000; 3253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* myData; 3254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pToU2022State; 3255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myData=(UConverterDataISO2022*)(args->converter->extraInfo); 3257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State = &myData->toU2022State; 3258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myData->key != 0) { 3260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial escape sequence */ 3261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto escape; 3262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { 3263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with a partial double-byte character */ 3264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar = args->converter->toUBytes[0]; 3265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 0; 326685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = missingCharMarker; 3267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto getTrailByte; 3268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(mySource < mySourceLimit){ 3271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar =missingCharMarker; 3273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget < args->targetLimit){ 3275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySourceChar= (unsigned char) *mySource++; 3277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(mySourceChar){ 3279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UCNV_SI: 3280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=0; 328185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (myData->isEmptySegment) { 328285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ 328385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *err = U_ILLEGAL_ESCAPE_SEQUENCE; 328485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUCallbackReason = UCNV_IRREGULAR; 328585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUBytes[0] = mySourceChar; 328685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toULength = 1; 328785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->target = myTarget; 328885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->source = mySource; 328985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 329085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 3291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 3292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case UCNV_SO: 3294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pToU2022State->cs[1] != 0) { 3295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=1; 329685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ 3297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 3298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* illegal to have SO before a matching designator */ 330085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */ 3301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case ESC_2022: 3305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mySource--; 3306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruescape: 330785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho { 330885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const char * mySourceBefore = mySource; 330985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int8_t toULengthBefore = args->converter->toULength; 331085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 331185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho changeState_2022(args->converter,&(mySource), 331285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceLimit, ISO_2022_CN,err); 331385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 331485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* After SO there must be at least one character before a designator (designator error handled separately) */ 331585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { 331685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *err = U_ILLEGAL_ESCAPE_SEQUENCE; 331785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho args->converter->toUCallbackReason = UCNV_IRREGULAR; 331850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore)); 331985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 332085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 3321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* invalid or illegal escape sequence */ 3323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)){ 3324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 3325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 332685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ 3327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 3328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 3330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */ 3332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case CR: 3334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*falls through*/ 3335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case LF: 3336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(pToU2022State, 0, sizeof(ISO2022State)); 3337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* falls through */ 3338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 3339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert one or two bytes */ 334085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho myData->isEmptySegment = FALSE; 3341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pToU2022State->g != 0) { 3342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySource < mySourceLimit) { 3343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterSharedData *cnv; 3344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru StateEnum tempState; 3345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t tempBufLen; 334685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int leadIsOk, trailIsOk; 334785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint8_t trailByte; 3348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetTrailByte: 334985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailByte = (uint8_t)*mySource; 335085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 335185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Ticket 5691: consistent illegal sequences: 335285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - We include at least the first byte in the illegal sequence. 335385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - If any of the non-initial bytes could be the start of a character, 335485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * we stop the illegal sequence before the first one of those. 335585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 335685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is 335785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * an ESC/SO/SI, we report only the first byte as the illegal sequence. 335885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Otherwise we convert or report the pair of bytes. 335985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 336085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); 336185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); 336285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (leadIsOk && trailIsOk) { 336385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 336485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempState = (StateEnum)pToU2022State->cs[pToU2022State->g]; 336585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(tempState >= CNS_11643_0) { 336685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv = myData->myConverterArray[CNS_11643]; 336785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0)); 336885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[1] = (char) (mySourceChar); 336985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[2] = (char) trailByte; 337085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBufLen = 3; 337185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 337285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho }else{ 3373103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS); 337485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv = myData->myConverterArray[tempState]; 337585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[0] = (char) (mySourceChar); 337685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBuf[1] = (char) trailByte; 337785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tempBufLen = 2; 337885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 337985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE); 338085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = (mySourceChar << 8) | trailByte; 338185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { 338285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* report a pair of illegal bytes if the second byte is not a DBCS starter */ 338385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++mySource; 338485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* add another bit so that the code below writes 2 bytes in case of error */ 338585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; 3386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pToU2022State->g>=2) { 3388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return from a single-shift state to the previous one */ 3389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToU2022State->g=pToU2022State->prevG; 3390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toUBytes[0] = (uint8_t)mySourceChar; 3393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->toULength = 1; 3394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto endloop; 3395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 3398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mySourceChar <= 0x7f) { 3399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar = (UChar) mySourceChar; 3400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){ 3405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 3406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 3407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++)=(UChar)targetUniChar; 3409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if(targetUniChar > missingCharMarker){ 3411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* disassemble the surrogate pair and write to output*/ 3412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetUniChar-=0x0010000; 3413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10)); 3414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 3415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 3416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++myTarget; 3418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myTarget< args->targetLimit){ 3419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); 3420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(args->offsets){ 3421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); 3422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++myTarget; 3424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 3425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= 3426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); 3427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 3431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Call the callback function*/ 3432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); 3433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else{ 3437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err =U_BUFFER_OVERFLOW_ERROR; 3438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruendloop: 3442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 3443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 3444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 3447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { 3448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 3449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo; 3450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ISO2022State *pFromU2022State=&myConverterData->fromU2022State; 3451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *p, *subchar; 3452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char buffer[8]; 3453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 3454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subchar=(char *)cnv->subChars; 3456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=cnv->subCharLen; /* assume length==1 for most variants */ 3457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p = buffer; 3459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(myConverterData->locale[0]){ 3460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'j': 3461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 3462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t cs; 3463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g == 1) { 3465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* JIS7: switch from G1 to G0 */ 3466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 3467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = UCNV_SI; 3468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cs = pFromU2022State->cs[0]; 3471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cs != ASCII && cs != JISX201) { 3472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not in ASCII or JIS X 0201: switch to ASCII */ 3473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->cs[0] = (int8_t)ASCII; 3474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = '\x1b'; 3475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = '\x28'; 3476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = '\x42'; 3477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[0]; 3480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'c': 3483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pFromU2022State->g != 0) { 3484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not in ASCII mode: switch to ASCII */ 3485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromU2022State->g = 0; 3486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = UCNV_SI; 3487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[0]; 3489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'k': 3491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->version == 0) { 3492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length == 1) { 3493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((UBool)args->converter->fromUnicodeStatus) { 3494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* in DBCS mode: switch to SBCS */ 3495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUnicodeStatus = 0; 3496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = UCNV_SI; 3497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[0]; 3499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* length == 2*/ { 3500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!(UBool)args->converter->fromUnicodeStatus) { 3501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* in SBCS mode: switch to DBCS */ 3502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter->fromUnicodeStatus = 1; 3503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = UCNV_SO; 3504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[0]; 3506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++ = subchar[1]; 3507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save the subconverter's substitution string */ 3511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *currentSubChars = myConverterData->currentConverter->subChars; 3512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen; 3513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set our substitution string into the subconverter */ 3515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->subChars = (uint8_t *)subchar; 3516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->subCharLen = (int8_t)length; 3517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* let the subconverter write the subchar, set/retrieve fromUChar32 state */ 3519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter = myConverterData->currentConverter; 3520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32; 3521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_cbFromUWriteSub(args, 0, err); 3522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32; 3523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->converter = cnv; 3524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* restore the subconverter's substitution string */ 3526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->subChars = currentSubChars; 3527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->subCharLen = currentSubCharLen; 3528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*err == U_BUFFER_OVERFLOW_ERROR) { 3530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(myConverterData->currentConverter->charErrorBufferLength > 0) { 3531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy( 3532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer, 3533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBuffer, 3534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBufferLength); 3535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength; 3537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myConverterData->currentConverter->charErrorBufferLength = 0; 3538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 3540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 3542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not expected */ 3543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_cbFromUWriteBytes(args, 3546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer, (int32_t)(p - buffer), 3547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsetIndex, err); 3548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 3551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Structure for cloning an ISO 2022 converter into a single memory block. 3552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ucnv_safeClone() of the converter will align the entire cloneStruct, 3553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and then ucnv_safeClone() of the sub-converter may additionally align 3554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * currentConverter inside the cloneStruct, for which we need the deadSpace 3555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * after currentConverter. 3556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is because UAlignedMemory may be larger than the actually 3557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * necessary alignment size for the platform. 3558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The other cloneStruct fields will not be moved around, 3559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and are aligned properly with cloneStruct's alignment. 3560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct cloneStruct 3562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 3563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter cnv; 3564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter currentConverter; 3565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UAlignedMemory deadSpace; 3566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 mydata; 3567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UConverter * 3571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_SafeClone( 3572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UConverter *cnv, 3573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void *stackBuffer, 3574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *pBufferSize, 3575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *status) 3576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 3577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru struct cloneStruct * localClone; 3578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022 *cnvData; 3579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, size; 3580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ 3582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pBufferSize = (int32_t)sizeof(struct cloneStruct); 3583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 3584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData = (UConverterDataISO2022 *)cnv->extraInfo; 3587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone = (struct cloneStruct *)stackBuffer; 3588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ 3590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022)); 3592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */ 3593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->cnv.isExtraLocal = TRUE; 3594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* share the subconverters */ 3596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnvData->currentConverter != NULL) { 3598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ 3599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru localClone->mydata.currentConverter = 3600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_safeClone(cnvData->currentConverter, 3601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &localClone->currentConverter, 3602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &size, status); 3603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*status)) { 3604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 3605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) { 3609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnvData->myConverterArray[i] != NULL) { 3610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_incrementRefCount(cnvData->myConverterArray[i]); 3611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return &localClone->cnv; 3615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 3618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_ISO_2022_GetUnicodeSet(const UConverter *cnv, 3619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const USetAdder *sa, 3620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterUnicodeSet which, 3621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) 3622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 3623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 3624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterDataISO2022* cnvData; 3625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*pErrorCode)) { 3627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 3628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 3630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cnv->sharedData == &_ISO2022Data) { 3631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* We use UTF-8 in this case */ 3632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0, 0xd7FF); 3633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0xE000, 0x10FFFF); 3634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 3635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 3637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData = (UConverterDataISO2022*)cnv->extraInfo; 3639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* open a set and initialize it with code points that are algorithmically round-tripped */ 3641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(cnvData->locale[0]){ 3642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'j': 3643c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* include JIS X 0201 which is hardcoded */ 3644c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru sa->add(sa->set, 0xa5); 3645c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru sa->add(sa->set, 0x203e); 3646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { 3647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* include Latin-1 for some variants of JP */ 3648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0, 0xff); 3649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* include ASCII for JP */ 3651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0, 0x7f); 3652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3653c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { 3654c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 3655c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0 3656c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) 3657c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * use half-width Katakana. 3658c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) 3659c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * half-width Katakana via the ESC ( I sequence. 3660c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * However, we only emit (fromUnicode) half-width Katakana according to the 3661c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * definition of each variant. 3662c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * 3663c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * When including fallbacks, 3664c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * we need to include half-width Katakana Unicode code points for all JP variants because 3665c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana). 3666c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 3667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* include half-width Katakana for JP */ 3668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, HWKANA_START, HWKANA_END); 3669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'c': 3672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'z': 3673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* include ASCII for CN */ 3674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addRange(sa->set, 0, 0x7f); 3675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 'k': 3677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* there is only one converter for KR, and it is not in the myConverterArray[] */ 3678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData->currentConverter->sharedData->impl->getUnicodeSet( 3679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData->currentConverter, sa, which, pErrorCode); 3680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the loop over myConverterArray[] will simply not find another converter */ 3681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 3683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 3684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3686c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */ 3687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData->version==0 && i==CNS_11643 3689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 3690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */ 3691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_MBCSGetUnicodeSetForBytes( 3692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnvData->myConverterArray[i], 3693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa, UCNV_ROUNDTRIP_SET, 3694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0x81, 0x82, 3695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 3696c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 3697c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru#endif 3698c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 3699c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 3700c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UConverterSetFilter filter; 3701c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(cnvData->myConverterArray[i]!=NULL) { 3702c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 3703c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru cnvData->version==0 && i==CNS_11643 3704c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ) { 3705c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 3706c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Version-specific for CN: 3707c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * CN version 0 does not map CNS planes 3..7 although 3708c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * they are all available in the CNS conversion table; 3709c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * CN version 1 (-EXT) does map them all. 3710c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * The two versions create different Unicode sets. 3711c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 3712c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter=UCNV_SET_FILTER_2022_CN; 3713c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(cnvData->locale[0]=='j' && i==JISX208) { 3714c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 3715c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Only add code points that map to Shift-JIS codes 3716c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * corresponding to JIS X 0208. 3717c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 3718c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter=UCNV_SET_FILTER_SJIS; 3719c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if(i==KSC5601) { 3720c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 3721c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) 3722c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * are broader than GR94. 3723c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 3724c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter=UCNV_SET_FILTER_GR94DBCS; 3725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 3726c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter=UCNV_SET_FILTER_NONE; 3727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3728c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode); 3729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 3731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 3733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ISO 2022 converters must not convert SO/SI/ESC despite what 3734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sub-converters do by themselves. 3735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove these characters from the set. 3736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 3737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->remove(sa->set, 0x0e); 3738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->remove(sa->set, 0x0f); 3739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->remove(sa->set, 0x1b); 3740c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 3741c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* ISO 2022 converters do not convert C1 controls either */ 3742c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru sa->removeRange(sa->set, 0x80, 0x9f); 3743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 3744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _ISO2022Impl={ 3746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Open, 3752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close, 3753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Reset, 3754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_ENABLE_GENERIC_ISO_2022 3756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC, 3757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC, 3758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode_UTF8, 3759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, 3760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 3761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 3766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022getName, 3770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_WriteSub, 3771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_SafeClone, 3772103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius _ISO_2022_GetUnicodeSet, 3773103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3774103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL, 3775103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL 3776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _ISO2022StaticData={ 3778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 3779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "ISO_2022", 3780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2022, 3781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, 3782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ 3785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x1a, 0, 0, 0 }, 3786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _ISO2022Data={ 3794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), 3795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~((uint32_t) 0), 3796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022StaticData, 3799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022Impl, 3801103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 0, UCNV_MBCS_TABLE_INITIALIZER 3802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************JP****************/ 3805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _ISO2022JPImpl={ 3806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Open, 3812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close, 3813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Reset, 3814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC, 3816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC, 3817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC, 3818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC, 3819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022getName, 3823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_WriteSub, 3824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_SafeClone, 3825103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius _ISO_2022_GetUnicodeSet, 3826103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3827103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL, 3828103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL 3829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _ISO2022JPStaticData={ 3831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 3832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "ISO_2022_JP", 3833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, 3835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */ 3838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x1a, 0, 0, 0 }, 3839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3846103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3847103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusnamespace { 3848103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3849103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst UConverterSharedData _ISO2022JPData={ 3850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), 3851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~((uint32_t) 0), 3852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022JPStaticData, 3855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022JPImpl, 3857103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 0, UCNV_MBCS_TABLE_INITIALIZER 3858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3860103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} // namespace 3861103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/************* KR ***************/ 3863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _ISO2022KRImpl={ 3864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Open, 3870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close, 3871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Reset, 3872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC, 3874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC, 3875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC, 3876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC, 3877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022getName, 3881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_WriteSub, 3882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_SafeClone, 3883103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius _ISO_2022_GetUnicodeSet, 3884103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3885103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL, 3886103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL 3887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _ISO2022KRStaticData={ 3889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 3890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "ISO_2022_KR", 3891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, 3893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, /* max 3 bytes per UChar: SO+DBCS */ 3896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x1a, 0, 0, 0 }, 3897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3904103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3905103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusnamespace { 3906103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3907103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst UConverterSharedData _ISO2022KRData={ 3908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), 3909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~((uint32_t) 0), 3910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022KRStaticData, 3913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022KRImpl, 3915103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 0, UCNV_MBCS_TABLE_INITIALIZER 3916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3918103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} // namespace 3919103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*************** CN ***************/ 3921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _ISO2022CNImpl={ 3922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Open, 3929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Close, 3930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022Reset, 3931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC, 3933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC, 3934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC, 3935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC, 3936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO2022getName, 3940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_WriteSub, 3941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _ISO_2022_SafeClone, 3942103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius _ISO_2022_GetUnicodeSet, 3943103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3944103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL, 3945103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius NULL 3946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _ISO2022CNStaticData={ 3948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 3949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "ISO_2022_CN", 3950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, 3952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_ISO_2022, 3953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */ 3955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x1a, 0, 0, 0 }, 3956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 3961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 3962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3963103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3964103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusnamespace { 3965103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3966103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusconst UConverterSharedData _ISO2022CNData={ 3967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), 3968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ~((uint32_t) 0), 3969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 3971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022CNStaticData, 3972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, 3973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &_ISO2022CNImpl, 3974103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 0, UCNV_MBCS_TABLE_INITIALIZER 3975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 3976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3977103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius} // namespace 3978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 3980