1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 327f654740f2a26ad62a5c155af9199af9e69b889claireho* Copyright (C) 2002-2010, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: ucnv_u16.c 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2002jul01 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* UTF-16 converter implementation. Used to be in ucnv_utf.c. 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_NEED_TO_WRITE_BOM=1 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 3185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The UTF-16 toUnicode implementation is also used for the Java-specific 3285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * "with BOM" variants of UTF-16BE and UTF-16LE. 3385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 3485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 3585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 3685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode); 3785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-16BE ----------------------------------------------------------------- */ 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if U_IS_BIG_ENDIAN 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# define _UTF16PEFromUnicodeWithOffsets _UTF16BEFromUnicodeWithOffsets 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source; 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *target; 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetCapacity, length, sourceIndex; 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c, trail; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char overflow[4]; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)(pArgs->sourceLimit-source); 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0) { 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input, nothing to do */ 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the BOM if necessary */ 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const char bom[]={ (char)0xfe, (char)0xff }; 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bom, 2, 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->target, pArgs->targetLimit, 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->offsets, -1, 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=0; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target >= pArgs->targetLimit) { 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-target); 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=0; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* c!=0 indicates in several places outside the main loops that a surrogate was found */ 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the last buffer ended with a lead surrogate, output the surrogate pair */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --length; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)(trail>>8); 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)trail; 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=4; 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=1; 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c=0; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy an even number of bytes for complete UChars */ 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t count=2*length; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>targetCapacity) { 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=targetCapacity&~1; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* count is even */ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=count; 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count>>=1; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=count; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=2; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)(trail>>8); 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)trail; 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=2; 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex++; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)(trail>>8); 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)trail; 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=2; 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==0) { 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* done with the loop for complete UChars */ 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0 && targetCapacity>0) { 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there is more input and some target capacity - 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it must be targetCapacity==1 because otherwise 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the above would have copied more; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * prepare for overflow output 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c=*source++)) { 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[0]=(char)(c>>8); 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[1]=(char)c; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; /* 2 bytes to output */ 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* } else { keep c for surrogate handling, length will be set there */ 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* keep c for surrogate handling, length will be set there */ 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity+=2*count; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; /* from here on, length counts the bytes in overflow[] */ 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c is a surrogate, and 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - source or target too short 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - or the surrogate is unmatched 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<pArgs->sourceLimit) { 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail=*source)) { 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair, will overflow (see conditions comment above) */ 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[0]=(char)(c>>8); 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[1]=(char)c; 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[2]=(char)(trail>>8); 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[3]=(char)trail; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; /* 4 bytes to output */ 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if the trail surrogate is in the next buffer */ 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c; 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output length bytes with overflow (length>targetCapacity>0) */ 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow, length, 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (char **)&target, pArgs->targetLimit, 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) { 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=(char *)target; 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source; 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetCapacity, length, count, sourceIndex; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c, trail; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->converter->mode<8) { 26385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); 26485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 26585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 26685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0 && cnv->toUnicodeStatus==0) { 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input, nothing to do */ 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target >= pArgs->targetLimit) { 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-target); 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=0; 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* complete a partial UChar or pair from the last call */ 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->toUnicodeStatus!=0) { 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * special case: single byte from a previous buffer, 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * where the byte turned out not to belong to a trail surrogate 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and the preceding, unmatched lead surrogate was put into toUBytes[] 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for error handling 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0; 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((count=cnv->toULength)!=0) { 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p=cnv->toUBytes; 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p[count++]=*source++; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++sourceIndex; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --length; 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==2) { 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)p[0]<<8)|p[1]; 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the BMP code point */ 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=0; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c)) { 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue collecting bytes for the trail surrogate */ 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; /* avoid unnecessary surrogate handling below */ 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fall through to error handling for an unmatched trail surrogate */ 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(count==4) { 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)p[0]<<8)|p[1]; 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=((UChar)p[2]<<8)|p[3]; 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair */ 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>=2) { 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* targetCapacity==1 */ { 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=trail; 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=0; 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate, handle here for consistent toUBytes[] */ 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* back out reading the code unit after it */ 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((const uint8_t *)pArgs->source-source)>=2) { 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source-=2; 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if the trail unit's first byte was in a previous buffer, then 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to put it into a special place because toUBytes[] will be 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * used for the lead unit's bytes 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0x100|p[2]; 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --source; 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(length>0); 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=(int8_t)count; 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy an even number of bytes for complete UChars */ 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=2*targetCapacity; 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>length) { 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=length&~1; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0 && count>0) { 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=count; 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count>>=1; 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=count; 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)source[0]<<8)|source[1]; 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(--count>0); 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)source[0]<<8)|source[1]; 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=2; 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=4; 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(--count>0); 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==0) { 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* done with the loop for complete UChars */ 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* keep c for surrogate handling, trail will be set there */ 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length+=2*(count-1); /* one more byte pair was consumed than count decremented */ 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity+=count; 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c is a surrogate, and 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - source or target too short 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - or the surrogate is unmatched 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=(uint8_t)(c>>8); 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=(uint8_t)c; 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>=2) { 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) { 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair, will overflow (see conditions comment above) */ 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=2; 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=trail; 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if the trail surrogate is in the next buffer */ 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode)) { 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check for a remaining source byte */ 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity==0) { 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* it must be length==1 because otherwise the above would have copied more */ 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[cnv->toULength++]=*source++; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s, *sourceLimit; 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->converter->mode<8) { 49585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return UCNV_GET_NEXT_UCHAR_USE_TO_U; 49685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 49785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=(const uint8_t *)pArgs->source; 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s>=sourceLimit) { 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input */ 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_INDEX_OUTOFBOUNDS_ERROR; 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s+2>sourceLimit) { 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only one byte: truncated UChar */ 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toUBytes[0]=*s++; 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=1; 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)s; 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_TRUNCATED_CHAR_FOUND; 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get one UChar */ 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar32)*s<<8)|s[1]; 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s+=2; 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check for a surrogate pair */ 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_IS_SURROGATE(c)) { 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s+2<=sourceLimit) { 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail; 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get a second UChar and see if it is a trail surrogate */ 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=((UChar)*s<<8)|s[1]; 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=U16_GET_SUPPLEMENTARY(c, trail); 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s+=2; 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-2; 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes=pArgs->converter->toUBytes; 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s-=2; 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=(int8_t)(sourceLimit-s); 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *bytes++=*s++; 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(s<sourceLimit); 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0xffff; 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_TRUNCATED_CHAR_FOUND; 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-2; 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0) { 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the unmatched surrogate */ 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes=pArgs->converter->toUBytes; 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=2; 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *bytes=*(s-2); 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[1]=*(s-1); 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0xffff; 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)s; 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 56985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) { 57085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(choice<=UCNV_RESET_TO_UNICODE) { 57185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset toUnicode state */ 57285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 57385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=8; /* no BOM handling */ 57485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 57585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */ 57685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 57785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 57885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { 57985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */ 58085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 58185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 58285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 58385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 58485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 58585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16BEOpen(UConverter *cnv, 58685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterLoadArgs *pArgs, 58785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode) { 58885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)<=1) { 58985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16BEReset(cnv, UCNV_RESET_BOTH); 59085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 59185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 59285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 59385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 59485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 59585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const char * 59685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16BEGetName(const UConverter *cnv) { 59785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 59885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16BE"; 59985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 60085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16BE,version=1"; 60185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 60285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 60385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _UTF16BEImpl={ 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_UTF16_BigEndian, 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16BEOpen, 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 61285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16BEReset, 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEToUnicodeWithOffsets, 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEToUnicodeWithOffsets, 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEFromUnicodeWithOffsets, 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEFromUnicodeWithOffsets, 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEGetNextUChar, 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 62185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16BEGetName, 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getNonSurrogateUnicodeSet 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _UTF16BEStaticData={ 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "UTF-16BE", 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE, 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _UTF16BEData={ 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t) 0), 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_UTF16BEStaticData, FALSE, &_UTF16BEImpl, 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-16LE ----------------------------------------------------------------- */ 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source; 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *target; 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetCapacity, length, sourceIndex; 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c, trail; 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char overflow[4]; 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)(pArgs->sourceLimit-source); 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0) { 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input, nothing to do */ 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the BOM if necessary */ 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const char bom[]={ (char)0xff, (char)0xfe }; 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bom, 2, 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->target, pArgs->targetLimit, 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->offsets, -1, 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=0; 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target >= pArgs->targetLimit) { 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=0; 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* c!=0 indicates in several places outside the main loops that a surrogate was found */ 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the last buffer ended with a lead surrogate, output the surrogate pair */ 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --length; 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)trail; 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)(trail>>8); 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=4; 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=1; 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c=0; 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy an even number of bytes for complete UChars */ 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t count=2*length; 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>targetCapacity) { 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=targetCapacity&~1; 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* count is even */ 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=count; 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count>>=1; 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=count; 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=2; 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)trail; 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)(trail>>8); 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=2; 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex++; 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)trail; 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)(trail>>8); 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=2; 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==0) { 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* done with the loop for complete UChars */ 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0 && targetCapacity>0) { 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there is more input and some target capacity - 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it must be targetCapacity==1 because otherwise 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the above would have copied more; 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * prepare for overflow output 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c=*source++)) { 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[0]=(char)c; 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[1]=(char)(c>>8); 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; /* 2 bytes to output */ 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* } else { keep c for surrogate handling, length will be set there */ 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* keep c for surrogate handling, length will be set there */ 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity+=2*count; 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; /* from here on, length counts the bytes in overflow[] */ 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c is a surrogate, and 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - source or target too short 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - or the surrogate is unmatched 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<pArgs->sourceLimit) { 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail=*source)) { 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair, will overflow (see conditions comment above) */ 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[0]=(char)c; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[1]=(char)(c>>8); 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[2]=(char)trail; 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[3]=(char)(trail>>8); 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; /* 4 bytes to output */ 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if the trail surrogate is in the next buffer */ 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c; 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output length bytes with overflow (length>targetCapacity>0) */ 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow, length, 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, pArgs->targetLimit, 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) { 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source; 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetCapacity, length, count, sourceIndex; 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c, trail; 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 86185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->converter->mode<8) { 86285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); 86385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 86485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 86585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0 && cnv->toUnicodeStatus==0) { 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input, nothing to do */ 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target >= pArgs->targetLimit) { 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=0; 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* complete a partial UChar or pair from the last call */ 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->toUnicodeStatus!=0) { 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * special case: single byte from a previous buffer, 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * where the byte turned out not to belong to a trail surrogate 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and the preceding, unmatched lead surrogate was put into toUBytes[] 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for error handling 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0; 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((count=cnv->toULength)!=0) { 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p=cnv->toUBytes; 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p[count++]=*source++; 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++sourceIndex; 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --length; 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==2) { 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)p[1]<<8)|p[0]; 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the BMP code point */ 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=0; 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c)) { 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue collecting bytes for the trail surrogate */ 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; /* avoid unnecessary surrogate handling below */ 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fall through to error handling for an unmatched trail surrogate */ 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(count==4) { 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)p[1]<<8)|p[0]; 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=((UChar)p[3]<<8)|p[2]; 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair */ 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>=2) { 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* targetCapacity==1 */ { 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=trail; 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=0; 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate, handle here for consistent toUBytes[] */ 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* back out reading the code unit after it */ 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((const uint8_t *)pArgs->source-source)>=2) { 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source-=2; 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if the trail unit's first byte was in a previous buffer, then 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to put it into a special place because toUBytes[] will be 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * used for the lead unit's bytes 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0x100|p[2]; 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --source; 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(length>0); 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=(int8_t)count; 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy an even number of bytes for complete UChars */ 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=2*targetCapacity; 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>length) { 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=length&~1; 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0 && count>0) { 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=count; 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count>>=1; 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=count; 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)source[1]<<8)|source[0]; 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(--count>0); 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)source[1]<<8)|source[0]; 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=2; 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=4; 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(--count>0); 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==0) { 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* done with the loop for complete UChars */ 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* keep c for surrogate handling, trail will be set there */ 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length+=2*(count-1); /* one more byte pair was consumed than count decremented */ 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity+=count; 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c is a surrogate, and 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - source or target too short 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - or the surrogate is unmatched 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=(uint8_t)c; 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=(uint8_t)(c>>8); 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>=2) { 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) { 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair, will overflow (see conditions comment above) */ 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=2; 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=trail; 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if the trail surrogate is in the next buffer */ 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode)) { 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check for a remaining source byte */ 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity==0) { 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* it must be length==1 because otherwise the above would have copied more */ 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[cnv->toULength++]=*source++; 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s, *sourceLimit; 1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->converter->mode<8) { 109485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return UCNV_GET_NEXT_UCHAR_USE_TO_U; 109585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 109685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=(const uint8_t *)pArgs->source; 1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s>=sourceLimit) { 1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input */ 1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_INDEX_OUTOFBOUNDS_ERROR; 1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s+2>sourceLimit) { 1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only one byte: truncated UChar */ 1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toUBytes[0]=*s++; 1109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=1; 1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)s; 1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_TRUNCATED_CHAR_FOUND; 1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get one UChar */ 1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar32)s[1]<<8)|*s; 1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s+=2; 1118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check for a surrogate pair */ 1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_IS_SURROGATE(c)) { 1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s+2<=sourceLimit) { 1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail; 1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get a second UChar and see if it is a trail surrogate */ 1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=((UChar)s[1]<<8)|*s; 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=U16_GET_SUPPLEMENTARY(c, trail); 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s+=2; 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-2; 1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ 1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes=pArgs->converter->toUBytes; 1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s-=2; 1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=(int8_t)(sourceLimit-s); 1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *bytes++=*s++; 1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(s<sourceLimit); 1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0xffff; 1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_TRUNCATED_CHAR_FOUND; 1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-2; 1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0) { 1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the unmatched surrogate */ 1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes=pArgs->converter->toUBytes; 1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=2; 1155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *bytes=*(s-2); 1156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[1]=*(s-1); 1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0xffff; 1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)s; 1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 116785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 116885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) { 116985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(choice<=UCNV_RESET_TO_UNICODE) { 117085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset toUnicode state */ 117185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 117285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=8; /* no BOM handling */ 117385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 117485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */ 117585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 117685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 117785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { 117885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */ 117985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 118085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 118185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 118285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 118385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 118485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16LEOpen(UConverter *cnv, 118585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterLoadArgs *pArgs, 118685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode) { 118785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)<=1) { 118885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16LEReset(cnv, UCNV_RESET_BOTH); 118985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 119085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 119185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 119285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 119385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 119485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const char * 119585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16LEGetName(const UConverter *cnv) { 119685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 119785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16LE"; 119885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 119985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16LE,version=1"; 120085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 120185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 120285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 1203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _UTF16LEImpl={ 1204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_UTF16_LittleEndian, 1205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 120985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16LEOpen, 1210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 121185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16LEReset, 1212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEToUnicodeWithOffsets, 1214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEToUnicodeWithOffsets, 1215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEFromUnicodeWithOffsets, 1216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEFromUnicodeWithOffsets, 1217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEGetNextUChar, 1218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 122085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16LEGetName, 1221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getNonSurrogateUnicodeSet 1224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _UTF16LEStaticData={ 1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "UTF-16LE", 1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, 1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE, 1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _UTF16LEData={ 1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t) 0), 1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_UTF16LEStaticData, FALSE, &_UTF16LEImpl, 1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-16 (Detect BOM) ------------------------------------------------------ */ 1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE 1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * accordingly. 124985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * This is a simpler version of the UTF-32 converter, with 1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * fewer states for shorter BOMs. 1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * State values: 1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0 initial state 125485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 1 saw first byte 125585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 2..5 - 125685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1 1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 8 UTF-16BE mode 1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 9 UTF-16LE mode 1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 126085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * During detection: state==number of initial bytes seen so far. 1261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * On output, emit U+FEFF as the first code point. 126385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 126485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Variants: 126585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error. 126685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and 126785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error. 1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) { 1272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 1273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset toUnicode: state=0 */ 1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=0; 1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset fromUnicode: prepare to output the UTF-16PE BOM */ 1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 128227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const UConverterSharedData _UTF16v2Data; 128327f654740f2a26ad62a5c155af9199af9e69b889claireho 1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16Open(UConverter *cnv, 128685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterLoadArgs *pArgs, 1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 128827f654740f2a26ad62a5c155af9199af9e69b889claireho if(UCNV_GET_VERSION(cnv)<=2) { 128927f654740f2a26ad62a5c155af9199af9e69b889claireho if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) { 129027f654740f2a26ad62a5c155af9199af9e69b889claireho /* 129127f654740f2a26ad62a5c155af9199af9e69b889claireho * Switch implementation, and switch the staticData that's different 129227f654740f2a26ad62a5c155af9199af9e69b889claireho * and was copied into the UConverter. 129327f654740f2a26ad62a5c155af9199af9e69b889claireho * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.) 129427f654740f2a26ad62a5c155af9199af9e69b889claireho * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream. 129527f654740f2a26ad62a5c155af9199af9e69b889claireho */ 129627f654740f2a26ad62a5c155af9199af9e69b889claireho cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data; 129727f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN); 129827f654740f2a26ad62a5c155af9199af9e69b889claireho } 129985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16Reset(cnv, UCNV_RESET_BOTH); 130085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 130185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 130285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 1303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const char * 130685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16GetName(const UConverter *cnv) { 130785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 130885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16"; 130927f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(UCNV_GET_VERSION(cnv)==1) { 131085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16,version=1"; 131127f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 131227f654740f2a26ad62a5c155af9199af9e69b889claireho return "UTF-16,version=2"; 131385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 131485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 131585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 131685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoconst UConverterSharedData _UTF16Data; 131785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 131885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData) 131985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData) 132027f654740f2a26ad62a5c155af9199af9e69b889claireho#define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data || (cnv)->sharedData==&_UTF16v2Data) 1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv=pArgs->converter; 1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *source=pArgs->source; 1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *sourceLimit=pArgs->sourceLimit; 1328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets=pArgs->offsets; 1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t state, offsetDelta; 133185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint8_t b; 1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=cnv->mode; 1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If we detect a BOM in this buffer, then we must add the BOM size to the 1337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets because the actual converter function will not see and count the BOM. 1338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsetDelta will have the number of the BOM bytes that are in the current buffer. 1339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsetDelta=0; 1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { 1343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 1344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0: 134585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toUBytes[0]=(uint8_t)*source++; 134685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toULength=1; 134785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=1; 1348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 135085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 135185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Only inside this switch case can the state variable 135285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * temporarily take two additional values: 135385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 6: BOM error, continue with BE 135485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 7: BOM error, continue with LE 135585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 135685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho b=*source; 135785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(cnv->toUBytes[0]==0xfe && b==0xff) { 135885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(IS_UTF16LE(cnv)) { 135985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=7; /* illegal reverse BOM for Java "UnicodeLittle" */ 136085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 1361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=8; /* detect UTF-16BE */ 136285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 136385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(cnv->toUBytes[0]==0xff && b==0xfe) { 136485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(IS_UTF16BE(cnv)) { 136585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=6; /* illegal reverse BOM for Java "UnicodeBig" */ 136685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 1367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=9; /* detect UTF-16LE */ 1368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 136985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) { 137085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=6; /* illegal missing BOM for Java "Unicode" */ 137185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 137285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(state>=8) { 137385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* BOM detected, consume it */ 137485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++source; 137585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toULength=0; 137685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho offsetDelta=(int32_t)(source-pArgs->source); 137785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(state<6) { 137885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ok: no BOM, and not a reverse BOM */ 1379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source!=pArgs->source) { 138085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset the source for a correct first offset */ 1381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 138285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toULength=0; 138385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 138485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(IS_UTF16LE(cnv)) { 138585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Make Java "UnicodeLittle" default to LE. */ 138685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=9; 1387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 138885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */ 138985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=8; 1390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 139185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 139285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 139385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * error: missing BOM, or reverse BOM 139485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * UTF-16,version=1: Java-specific "Unicode" requires a BOM. 139585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM. 139685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM. 139785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 139885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* report the non-BOM or reverse BOM as an illegal sequence */ 139985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toUBytes[1]=b; 140085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toULength=2; 140185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho pArgs->source=source+1; 140285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* continue with conversion if the callback resets the error */ 140385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 140485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Make Java "Unicode" default to BE like standard UTF-16. 140585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Make Java "UnicodeBig" and "UnicodeLittle" default 140685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * to their normal endiannesses. 140785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 140885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=state+2; 140985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; 141085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 1411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 141285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* convert the rest of the stream */ 141385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=state; 141485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho continue; 1415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 8: 1416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* call UTF-16BE */ 1417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); 1419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 1420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 9: 1422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* call UTF-16LE */ 1423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); 1425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 1426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* does not occur */ 1429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add BOM size to offsets - see comment at offsetDelta declaration */ 1433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL && offsetDelta!=0) { 1434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsetsLimit=pArgs->offsets; 1435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(offsets<offsetsLimit) { 1436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ += offsetDelta; 1437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source==sourceLimit && pArgs->flush) { 1443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* handle truncated input */ 1444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 1445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0: 1446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* no input at all, nothing to do */ 1447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 8: 1448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); 1449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 9: 1451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); 1452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 145485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 0<state<8: framework will report truncation, nothing to do here */ 1455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=state; 1460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 1463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs, 1464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(pArgs->converter->mode) { 1466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 8: 1467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return _UTF16BEGetNextUChar(pArgs, pErrorCode); 1468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 9: 1469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return _UTF16LEGetNextUChar(pArgs, pErrorCode); 1470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_GET_NEXT_UCHAR_USE_TO_U; 1472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _UTF16Impl = { 1476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_UTF16, 1477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16Open, 1482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16Reset, 1484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16ToUnicodeWithOffsets, 1486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16ToUnicodeWithOffsets, 1487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16PEFromUnicodeWithOffsets, 1488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16PEFromUnicodeWithOffsets, 1489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16GetNextUChar, 1490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ 149285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16GetName, 1493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getNonSurrogateUnicodeSet 1496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _UTF16StaticData = { 1499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 1500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "UTF-16", 1501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1204, /* CCSID for BOM sensitive UTF-16 */ 1502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, UCNV_UTF16, 2, 2, 1503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if U_IS_BIG_ENDIAN 1504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xff, 0xfd, 0, 0 }, 2, 1505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 1506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xfd, 0xff, 0, 0 }, 2, 1507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, FALSE, 1509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _UTF16Data = { 1515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t) 0), 1516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_UTF16StaticData, FALSE, &_UTF16Impl, 1517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 1518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152027f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const UConverterImpl _UTF16v2Impl = { 152127f654740f2a26ad62a5c155af9199af9e69b889claireho UCNV_UTF16, 152227f654740f2a26ad62a5c155af9199af9e69b889claireho 152327f654740f2a26ad62a5c155af9199af9e69b889claireho NULL, 152427f654740f2a26ad62a5c155af9199af9e69b889claireho NULL, 152527f654740f2a26ad62a5c155af9199af9e69b889claireho 152627f654740f2a26ad62a5c155af9199af9e69b889claireho _UTF16Open, 152727f654740f2a26ad62a5c155af9199af9e69b889claireho NULL, 152827f654740f2a26ad62a5c155af9199af9e69b889claireho _UTF16Reset, 152927f654740f2a26ad62a5c155af9199af9e69b889claireho 153027f654740f2a26ad62a5c155af9199af9e69b889claireho _UTF16ToUnicodeWithOffsets, 153127f654740f2a26ad62a5c155af9199af9e69b889claireho _UTF16ToUnicodeWithOffsets, 153227f654740f2a26ad62a5c155af9199af9e69b889claireho _UTF16BEFromUnicodeWithOffsets, 153327f654740f2a26ad62a5c155af9199af9e69b889claireho _UTF16BEFromUnicodeWithOffsets, 153427f654740f2a26ad62a5c155af9199af9e69b889claireho _UTF16GetNextUChar, 153527f654740f2a26ad62a5c155af9199af9e69b889claireho 153627f654740f2a26ad62a5c155af9199af9e69b889claireho NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ 153727f654740f2a26ad62a5c155af9199af9e69b889claireho _UTF16GetName, 153827f654740f2a26ad62a5c155af9199af9e69b889claireho NULL, 153927f654740f2a26ad62a5c155af9199af9e69b889claireho NULL, 154027f654740f2a26ad62a5c155af9199af9e69b889claireho ucnv_getNonSurrogateUnicodeSet 154127f654740f2a26ad62a5c155af9199af9e69b889claireho}; 154227f654740f2a26ad62a5c155af9199af9e69b889claireho 154327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const UConverterStaticData _UTF16v2StaticData = { 154427f654740f2a26ad62a5c155af9199af9e69b889claireho sizeof(UConverterStaticData), 154527f654740f2a26ad62a5c155af9199af9e69b889claireho "UTF-16,version=2", 154627f654740f2a26ad62a5c155af9199af9e69b889claireho 1204, /* CCSID for BOM sensitive UTF-16 */ 154727f654740f2a26ad62a5c155af9199af9e69b889claireho UCNV_IBM, UCNV_UTF16, 2, 2, 154827f654740f2a26ad62a5c155af9199af9e69b889claireho { 0xff, 0xfd, 0, 0 }, 2, 154927f654740f2a26ad62a5c155af9199af9e69b889claireho FALSE, FALSE, 155027f654740f2a26ad62a5c155af9199af9e69b889claireho 0, 155127f654740f2a26ad62a5c155af9199af9e69b889claireho 0, 155227f654740f2a26ad62a5c155af9199af9e69b889claireho { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 155327f654740f2a26ad62a5c155af9199af9e69b889claireho}; 155427f654740f2a26ad62a5c155af9199af9e69b889claireho 155527f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const UConverterSharedData _UTF16v2Data = { 155627f654740f2a26ad62a5c155af9199af9e69b889claireho sizeof(UConverterSharedData), ~((uint32_t) 0), 155727f654740f2a26ad62a5c155af9199af9e69b889claireho NULL, NULL, &_UTF16v2StaticData, FALSE, &_UTF16v2Impl, 155827f654740f2a26ad62a5c155af9199af9e69b889claireho 0 155927f654740f2a26ad62a5c155af9199af9e69b889claireho}; 156027f654740f2a26ad62a5c155af9199af9e69b889claireho 1561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1562