ucnv_u16.c revision 85bf2e2fbc60a9f938064abc8127d61da7d19882
1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Copyright (C) 2002-2009, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: ucnv_u16.c 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2002jul01 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* UTF-16 converter implementation. Used to be in ucnv_utf.c. 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruenum { 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_NEED_TO_WRITE_BOM=1 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 3185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The UTF-16 toUnicode implementation is also used for the Java-specific 3285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * "with BOM" variants of UTF-16BE and UTF-16LE. 3385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 3485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 3585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 3685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode); 3785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-16BE ----------------------------------------------------------------- */ 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if U_IS_BIG_ENDIAN 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# define _UTF16PEFromUnicodeWithOffsets _UTF16BEFromUnicodeWithOffsets 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source; 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *target; 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetCapacity, length, sourceIndex; 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c, trail; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char overflow[4]; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)(pArgs->sourceLimit-source); 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0) { 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input, nothing to do */ 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the BOM if necessary */ 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const char bom[]={ (char)0xfe, (char)0xff }; 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bom, 2, 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->target, pArgs->targetLimit, 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->offsets, -1, 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=0; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target >= pArgs->targetLimit) { 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-target); 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=0; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* c!=0 indicates in several places outside the main loops that a surrogate was found */ 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the last buffer ended with a lead surrogate, output the surrogate pair */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --length; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)(trail>>8); 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)trail; 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=4; 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=1; 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c=0; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy an even number of bytes for complete UChars */ 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t count=2*length; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>targetCapacity) { 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=targetCapacity&~1; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* count is even */ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=count; 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count>>=1; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=count; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=2; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)(trail>>8); 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)trail; 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=2; 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex++; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)(c>>8); 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)c; 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)(trail>>8); 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)trail; 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=2; 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==0) { 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* done with the loop for complete UChars */ 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0 && targetCapacity>0) { 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there is more input and some target capacity - 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it must be targetCapacity==1 because otherwise 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the above would have copied more; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * prepare for overflow output 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c=*source++)) { 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[0]=(char)(c>>8); 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[1]=(char)c; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; /* 2 bytes to output */ 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* } else { keep c for surrogate handling, length will be set there */ 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* keep c for surrogate handling, length will be set there */ 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity+=2*count; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; /* from here on, length counts the bytes in overflow[] */ 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c is a surrogate, and 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - source or target too short 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - or the surrogate is unmatched 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<pArgs->sourceLimit) { 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail=*source)) { 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair, will overflow (see conditions comment above) */ 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[0]=(char)(c>>8); 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[1]=(char)c; 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[2]=(char)(trail>>8); 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[3]=(char)trail; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; /* 4 bytes to output */ 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if the trail surrogate is in the next buffer */ 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c; 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output length bytes with overflow (length>targetCapacity>0) */ 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow, length, 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (char **)&target, pArgs->targetLimit, 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) { 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=(char *)target; 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source; 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetCapacity, length, count, sourceIndex; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c, trail; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->converter->mode<8) { 26385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); 26485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 26585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 26685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0 && cnv->toUnicodeStatus==0) { 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input, nothing to do */ 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target >= pArgs->targetLimit) { 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-target); 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=0; 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* complete a partial UChar or pair from the last call */ 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->toUnicodeStatus!=0) { 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * special case: single byte from a previous buffer, 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * where the byte turned out not to belong to a trail surrogate 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and the preceding, unmatched lead surrogate was put into toUBytes[] 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for error handling 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0; 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((count=cnv->toULength)!=0) { 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p=cnv->toUBytes; 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p[count++]=*source++; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++sourceIndex; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --length; 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==2) { 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)p[0]<<8)|p[1]; 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the BMP code point */ 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=0; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c)) { 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue collecting bytes for the trail surrogate */ 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; /* avoid unnecessary surrogate handling below */ 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fall through to error handling for an unmatched trail surrogate */ 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(count==4) { 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)p[0]<<8)|p[1]; 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=((UChar)p[2]<<8)|p[3]; 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair */ 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>=2) { 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* targetCapacity==1 */ { 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=trail; 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=0; 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate, handle here for consistent toUBytes[] */ 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* back out reading the code unit after it */ 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((const uint8_t *)pArgs->source-source)>=2) { 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source-=2; 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if the trail unit's first byte was in a previous buffer, then 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to put it into a special place because toUBytes[] will be 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * used for the lead unit's bytes 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0x100|p[2]; 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --source; 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(length>0); 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=(int8_t)count; 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy an even number of bytes for complete UChars */ 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=2*targetCapacity; 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>length) { 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=length&~1; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0 && count>0) { 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=count; 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count>>=1; 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=count; 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)source[0]<<8)|source[1]; 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(--count>0); 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)source[0]<<8)|source[1]; 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=2; 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=4; 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(--count>0); 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==0) { 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* done with the loop for complete UChars */ 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* keep c for surrogate handling, trail will be set there */ 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length+=2*(count-1); /* one more byte pair was consumed than count decremented */ 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity+=count; 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c is a surrogate, and 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - source or target too short 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - or the surrogate is unmatched 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=(uint8_t)(c>>8); 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=(uint8_t)c; 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>=2) { 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) { 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair, will overflow (see conditions comment above) */ 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=2; 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=trail; 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if the trail surrogate is in the next buffer */ 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode)) { 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check for a remaining source byte */ 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity==0) { 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* it must be length==1 because otherwise the above would have copied more */ 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[cnv->toULength++]=*source++; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s, *sourceLimit; 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->converter->mode<8) { 49585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return UCNV_GET_NEXT_UCHAR_USE_TO_U; 49685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 49785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=(const uint8_t *)pArgs->source; 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s>=sourceLimit) { 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input */ 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_INDEX_OUTOFBOUNDS_ERROR; 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s+2>sourceLimit) { 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only one byte: truncated UChar */ 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toUBytes[0]=*s++; 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=1; 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)s; 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_TRUNCATED_CHAR_FOUND; 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get one UChar */ 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar32)*s<<8)|s[1]; 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s+=2; 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check for a surrogate pair */ 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_IS_SURROGATE(c)) { 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s+2<=sourceLimit) { 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail; 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get a second UChar and see if it is a trail surrogate */ 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=((UChar)*s<<8)|s[1]; 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=U16_GET_SUPPLEMENTARY(c, trail); 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s+=2; 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-2; 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes=pArgs->converter->toUBytes; 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s-=2; 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=(int8_t)(sourceLimit-s); 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *bytes++=*s++; 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(s<sourceLimit); 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0xffff; 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_TRUNCATED_CHAR_FOUND; 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-2; 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0) { 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the unmatched surrogate */ 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes=pArgs->converter->toUBytes; 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=2; 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *bytes=*(s-2); 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[1]=*(s-1); 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0xffff; 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)s; 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 56985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) { 57085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(choice<=UCNV_RESET_TO_UNICODE) { 57185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset toUnicode state */ 57285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 57385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=8; /* no BOM handling */ 57485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 57585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */ 57685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 57785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 57885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { 57985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */ 58085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 58185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 58285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 58385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 58485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 58585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16BEOpen(UConverter *cnv, 58685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterLoadArgs *pArgs, 58785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode) { 58885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)<=1) { 58985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16BEReset(cnv, UCNV_RESET_BOTH); 59085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 59185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 59285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 59385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 59485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 59585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const char * 59685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16BEGetName(const UConverter *cnv) { 59785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 59885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16BE"; 59985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 60085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16BE,version=1"; 60185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 60285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 60385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _UTF16BEImpl={ 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_UTF16_BigEndian, 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16BEOpen, 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 61285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16BEReset, 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEToUnicodeWithOffsets, 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEToUnicodeWithOffsets, 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEFromUnicodeWithOffsets, 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEFromUnicodeWithOffsets, 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEGetNextUChar, 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 62185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16BEGetName, 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getNonSurrogateUnicodeSet 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _UTF16BEStaticData={ 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "UTF-16BE", 63085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* BEGIN Android-changed */ 63185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ICU ticket#7226 */ 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, 63385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* END Android-changed */ 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE, 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _UTF16BEData={ 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t) 0), 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_UTF16BEStaticData, FALSE, &_UTF16BEImpl, 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-16LE ----------------------------------------------------------------- */ 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *source; 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *target; 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetCapacity, length, sourceIndex; 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c, trail; 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char overflow[4]; 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)(pArgs->sourceLimit-source); 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0) { 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input, nothing to do */ 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the BOM if necessary */ 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static const char bom[]={ (char)0xff, (char)0xfe }; 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bom, 2, 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->target, pArgs->targetLimit, 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->offsets, -1, 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=0; 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target >= pArgs->targetLimit) { 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=0; 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* c!=0 indicates in several places outside the main loops that a surrogate was found */ 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the last buffer ended with a lead surrogate, output the surrogate pair */ 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --length; 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)trail; 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)(trail>>8); 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=4; 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=1; 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c=0; 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0) { 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy an even number of bytes for complete UChars */ 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t count=2*length; 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>targetCapacity) { 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=targetCapacity&~1; 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* count is even */ 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=count; 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count>>=1; 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=count; 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=2; 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)trail; 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)(trail>>8); 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=*source++; 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=2; 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex++; 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0]=(uint8_t)c; 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[1]=(uint8_t)(c>>8); 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[2]=(uint8_t)trail; 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[3]=(uint8_t)(trail>>8); 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target+=4; 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=2; 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==0) { 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* done with the loop for complete UChars */ 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0 && targetCapacity>0) { 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there is more input and some target capacity - 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * it must be targetCapacity==1 because otherwise 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the above would have copied more; 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * prepare for overflow output 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c=*source++)) { 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[0]=(char)c; 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[1]=(char)(c>>8); 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=2; /* 2 bytes to output */ 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* } else { keep c for surrogate handling, length will be set there */ 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* keep c for surrogate handling, length will be set there */ 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity+=2*count; 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; /* from here on, length counts the bytes in overflow[] */ 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c is a surrogate, and 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - source or target too short 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - or the surrogate is unmatched 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<pArgs->sourceLimit) { 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail=*source)) { 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair, will overflow (see conditions comment above) */ 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[0]=(char)c; 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[1]=(char)(c>>8); 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[2]=(char)trail; 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow[3]=(char)(trail>>8); 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=4; /* 4 bytes to output */ 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if the trail surrogate is in the next buffer */ 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=c; 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output length bytes with overflow (length>targetCapacity>0) */ 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru overflow, length, 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &target, pArgs->targetLimit, 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &offsets, sourceIndex, 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) { 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source; 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *target; 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets; 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t targetCapacity, length, count, sourceIndex; 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c, trail; 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 86485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->converter->mode<8) { 86585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); 86685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 86785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 86885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pArgs->converter; 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(const uint8_t *)pArgs->source; 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0 && cnv->toUnicodeStatus==0) { 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input, nothing to do */ 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pArgs->target; 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target >= pArgs->targetLimit) { 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets=pArgs->offsets; 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex=0; 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* complete a partial UChar or pair from the last call */ 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cnv->toUnicodeStatus!=0) { 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * special case: single byte from a previous buffer, 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * where the byte turned out not to belong to a trail surrogate 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and the preceding, unmatched lead surrogate was put into toUBytes[] 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for error handling 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=1; 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0; 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((count=cnv->toULength)!=0) { 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p=cnv->toUBytes; 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p[count++]=*source++; 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++sourceIndex; 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --length; 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==2) { 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)p[1]<<8)|p[0]; 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the BMP code point */ 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --targetCapacity; 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=0; 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c)) { 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue collecting bytes for the trail surrogate */ 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; /* avoid unnecessary surrogate handling below */ 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fall through to error handling for an unmatched trail surrogate */ 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(count==4) { 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)p[1]<<8)|p[0]; 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=((UChar)p[3]<<8)|p[2]; 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair */ 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity>=2) { 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=-1; 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=2; 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* targetCapacity==1 */ { 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=0; 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=trail; 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=0; 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate, handle here for consistent toUBytes[] */ 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* back out reading the code unit after it */ 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(((const uint8_t *)pArgs->source-source)>=2) { 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source-=2; 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if the trail unit's first byte was in a previous buffer, then 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we need to put it into a special place because toUBytes[] will be 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * used for the lead unit's bytes 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus=0x100|p[2]; 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --source; 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(length>0); 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=(int8_t)count; 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy an even number of bytes for complete UChars */ 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=2*targetCapacity; 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>length) { 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=length&~1; 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0 && count>0) { 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=count; 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count>>=1; 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity-=count; 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets==NULL) { 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)source[1]<<8)|source[0]; 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(--count>0); 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar)source[1]<<8)|source[0]; 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SINGLE(c)) { 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=2; 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=trail; 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceIndex+=4; 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(--count>0); 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count==0) { 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* done with the loop for complete UChars */ 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* keep c for surrogate handling, trail will be set there */ 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length+=2*(count-1); /* one more byte pair was consumed than count decremented */ 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity+=count; 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c is a surrogate, and 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - source or target too short 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - or the surrogate is unmatched 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0]=(uint8_t)c; 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[1]=(uint8_t)(c>>8); 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=2; 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>=2) { 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) { 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the surrogate pair, will overflow (see conditions comment above) */ 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length-=2; 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=c; 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL) { 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++=sourceIndex; 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0]=trail; 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength=1; 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=0; 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if the trail surrogate is in the next buffer */ 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode)) { 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check for a remaining source byte */ 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(targetCapacity==0) { 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* it must be length==1 because otherwise the above would have copied more */ 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[cnv->toULength++]=*source++; 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)source; 1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->target=target; 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->offsets=offsets; 1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *s, *sourceLimit; 1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(pArgs->converter->mode<8) { 109785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return UCNV_GET_NEXT_UCHAR_USE_TO_U; 109885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 109985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=(const uint8_t *)pArgs->source; 1101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(const uint8_t *)pArgs->sourceLimit; 1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s>=sourceLimit) { 1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input */ 1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_INDEX_OUTOFBOUNDS_ERROR; 1106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s+2>sourceLimit) { 1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* only one byte: truncated UChar */ 1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toUBytes[0]=*s++; 1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=1; 1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)s; 1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_TRUNCATED_CHAR_FOUND; 1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get one UChar */ 1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=((UChar32)s[1]<<8)|*s; 1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s+=2; 1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check for a surrogate pair */ 1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_IS_SURROGATE(c)) { 1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_SURROGATE_LEAD(c)) { 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s+2<=sourceLimit) { 1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar trail; 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get a second UChar and see if it is a trail surrogate */ 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trail=((UChar)s[1]<<8)|*s; 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U16_IS_TRAIL(trail)) { 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=U16_GET_SUPPLEMENTARY(c, trail); 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s+=2; 1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched lead surrogate */ 1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-2; 1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ 1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes=pArgs->converter->toUBytes; 1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s-=2; 1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=(int8_t)(sourceLimit-s); 1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *bytes++=*s++; 1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while(s<sourceLimit); 1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0xffff; 1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_TRUNCATED_CHAR_FOUND; 1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unmatched trail surrogate */ 1151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=-2; 1152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c<0) { 1155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the unmatched surrogate */ 1156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes=pArgs->converter->toUBytes; 1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->converter->toULength=2; 1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *bytes=*(s-2); 1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes[1]=*(s-1); 1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0xffff; 1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err=U_ILLEGAL_CHAR_FOUND; 1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=(const char *)s; 1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c; 1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 117085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 117185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) { 117285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(choice<=UCNV_RESET_TO_UNICODE) { 117385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset toUnicode state */ 117485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 117585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=8; /* no BOM handling */ 117685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 117785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */ 117885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 117985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 118085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { 118185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */ 118285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 118385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 118485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 118585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 118685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 118785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16LEOpen(UConverter *cnv, 118885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterLoadArgs *pArgs, 118985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode) { 119085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)<=1) { 119185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16LEReset(cnv, UCNV_RESET_BOTH); 119285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 119385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 119485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 119585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 119685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 119785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const char * 119885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16LEGetName(const UConverter *cnv) { 119985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 120085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16LE"; 120185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 120285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16LE,version=1"; 120385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 120485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 120585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 1206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _UTF16LEImpl={ 1207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_UTF16_LittleEndian, 1208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 121285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16LEOpen, 1213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 121485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16LEReset, 1215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEToUnicodeWithOffsets, 1217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEToUnicodeWithOffsets, 1218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEFromUnicodeWithOffsets, 1219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEFromUnicodeWithOffsets, 1220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEGetNextUChar, 1221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 122385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16LEGetName, 1224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getNonSurrogateUnicodeSet 1227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _UTF16LEStaticData={ 1231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 1232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "UTF-16LE", 123385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* BEGIN Android-changed */ 123485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ICU ticket#7226 */ 1235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, 123685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* END Android-changed */ 1237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE, 1238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _UTF16LEData={ 1245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t) 0), 1246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_UTF16LEStaticData, FALSE, &_UTF16LEImpl, 1247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 1248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-16 (Detect BOM) ------------------------------------------------------ */ 1251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE 1254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * accordingly. 125585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * This is a simpler version of the UTF-32 converter, with 1256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * fewer states for shorter BOMs. 1257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * State values: 1259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0 initial state 126085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 1 saw first byte 126185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 2..5 - 126285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1 1263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 8 UTF-16BE mode 1264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 9 UTF-16LE mode 1265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 126685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * During detection: state==number of initial bytes seen so far. 1267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * On output, emit U+FEFF as the first code point. 126985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 127085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Variants: 127185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error. 127285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and 127385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error. 1274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) { 1278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice<=UCNV_RESET_TO_UNICODE) { 1279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset toUnicode: state=0 */ 1280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=0; 1281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(choice!=UCNV_RESET_TO_UNICODE) { 1283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset fromUnicode: prepare to output the UTF-16PE BOM */ 1284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 1285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16Open(UConverter *cnv, 129085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UConverterLoadArgs *pArgs, 1291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 129285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)<=1) { 129385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16Reset(cnv, UCNV_RESET_BOTH); 129485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 129585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 129685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 1297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 129985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic const char * 130085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho_UTF16GetName(const UConverter *cnv) { 130185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(UCNV_GET_VERSION(cnv)==0) { 130285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16"; 130385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 130485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return "UTF-16,version=1"; 130585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 130685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 130785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 130885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoconst UConverterSharedData _UTF16Data; 130985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 131085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData) 131185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData) 131285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data) 1313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 1315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 1316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv=pArgs->converter; 1318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *source=pArgs->source; 1319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *sourceLimit=pArgs->sourceLimit; 1320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsets=pArgs->offsets; 1321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t state, offsetDelta; 132385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint8_t b; 1324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=cnv->mode; 1326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If we detect a BOM in this buffer, then we must add the BOM size to the 1329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsets because the actual converter function will not see and count the BOM. 1330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offsetDelta will have the number of the BOM bytes that are in the current buffer. 1331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsetDelta=0; 1333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { 1335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 1336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0: 133785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toUBytes[0]=(uint8_t)*source++; 133885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toULength=1; 133985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=1; 1340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 134285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 134385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Only inside this switch case can the state variable 134485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * temporarily take two additional values: 134585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 6: BOM error, continue with BE 134685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 7: BOM error, continue with LE 134785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 134885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho b=*source; 134985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(cnv->toUBytes[0]==0xfe && b==0xff) { 135085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(IS_UTF16LE(cnv)) { 135185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=7; /* illegal reverse BOM for Java "UnicodeLittle" */ 135285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 1353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=8; /* detect UTF-16BE */ 135485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 135585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(cnv->toUBytes[0]==0xff && b==0xfe) { 135685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(IS_UTF16BE(cnv)) { 135785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=6; /* illegal reverse BOM for Java "UnicodeBig" */ 135885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 1359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state=9; /* detect UTF-16LE */ 1360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 136185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) { 136285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=6; /* illegal missing BOM for Java "Unicode" */ 136385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 136485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(state>=8) { 136585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* BOM detected, consume it */ 136685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++source; 136785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toULength=0; 136885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho offsetDelta=(int32_t)(source-pArgs->source); 136985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(state<6) { 137085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ok: no BOM, and not a reverse BOM */ 1371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source!=pArgs->source) { 137285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reset the source for a correct first offset */ 1373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 137485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toULength=0; 137585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 137685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(IS_UTF16LE(cnv)) { 137785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Make Java "UnicodeLittle" default to LE. */ 137885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=9; 1379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 138085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */ 138185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho state=8; 1382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 138385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 138485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 138585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * error: missing BOM, or reverse BOM 138685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * UTF-16,version=1: Java-specific "Unicode" requires a BOM. 138785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM. 138885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM. 138985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 139085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* report the non-BOM or reverse BOM as an illegal sequence */ 139185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toUBytes[1]=b; 139285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->toULength=2; 139385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho pArgs->source=source+1; 139485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* continue with conversion if the callback resets the error */ 139585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 139685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Make Java "Unicode" default to BE like standard UTF-16. 139785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Make Java "UnicodeBig" and "UnicodeLittle" default 139885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * to their normal endiannesses. 139985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 140085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=state+2; 140185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; 140285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 1403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 140485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* convert the rest of the stream */ 140585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho cnv->mode=state; 140685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho continue; 1407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 8: 1408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* call UTF-16BE */ 1409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); 1411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 1412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 9: 1414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* call UTF-16LE */ 1415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); 1417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=pArgs->source; 1418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* does not occur */ 1421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add BOM size to offsets - see comment at offsetDelta declaration */ 1425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(offsets!=NULL && offsetDelta!=0) { 1426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *offsetsLimit=pArgs->offsets; 1427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(offsets<offsetsLimit) { 1428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *offsets++ += offsetDelta; 1429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=source; 1433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source==sourceLimit && pArgs->flush) { 1435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* handle truncated input */ 1436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(state) { 1437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0: 1438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* no input at all, nothing to do */ 1439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 8: 1440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); 1441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 9: 1443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); 1444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 144685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 0<state<8: framework will report truncation, nothing to do here */ 1447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode=state; 1452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 1455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs, 1456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(pArgs->converter->mode) { 1458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 8: 1459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return _UTF16BEGetNextUChar(pArgs, pErrorCode); 1460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 9: 1461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return _UTF16LEGetNextUChar(pArgs, pErrorCode); 1462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 1463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_GET_NEXT_UCHAR_USE_TO_U; 1464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _UTF16Impl = { 1468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_UTF16, 1469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16Open, 1474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16Reset, 1476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16ToUnicodeWithOffsets, 1478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16ToUnicodeWithOffsets, 1479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16PEFromUnicodeWithOffsets, 1480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16PEFromUnicodeWithOffsets, 1481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _UTF16GetNextUChar, 1482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ 148485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho _UTF16GetName, 1485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getNonSurrogateUnicodeSet 1488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _UTF16StaticData = { 1491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 1492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "UTF-16", 1493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1204, /* CCSID for BOM sensitive UTF-16 */ 149485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* BEGIN Android-changed */ 149585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ICU ticket#7226 */ 1496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_IBM, UCNV_UTF16, 2, 2, 149785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* BEGIN Android-changed */ 1498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if U_IS_BIG_ENDIAN 1499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xff, 0xfd, 0, 0 }, 2, 1500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 1501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xfd, 0xff, 0, 0 }, 2, 1502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FALSE, FALSE, 1504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _UTF16Data = { 1510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t) 0), 1511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_UTF16StaticData, FALSE, &_UTF16Impl, 1512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 1513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1516