1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 2002-2007, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: ucnv_u8.c 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2002jul01 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* UTF-8 converter implementation. Used to be in ucnv_utf.c. 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Also, CESU-8 implementation, see UTR 26. 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* The CESU-8 converter uses all the same functions as the 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* UTF-8 converter, with a branch for converting supplementary code points. 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Prototypes --------------------------------------------------------------- */ 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Keep these here to make finicky compilers happy */ 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args, 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *err); 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args, 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *err); 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-8 -------------------------------------------------------------------- */ 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-8 Conversion DATA 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/ 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAXIMUM_UCS2 0x0000FFFF 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAXIMUM_UTF 0x0010FFFF 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAXIMUM_UCS4 0x7FFFFFFF 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define HALF_SHIFT 10 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define HALF_BASE 0x0010000 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define HALF_MASK 0x3FF 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_HIGH_START 0xD800 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_HIGH_END 0xDBFF 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_LOW_START 0xDC00 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_LOW_END 0xDFFF 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* -SURROGATE_LOW_START + HALF_BASE */ 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_LOW_BASE 9216 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t offsetsFromUTF8[7] = {0, 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* END OF UTF-8 Conversion DATA */ 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t bytesFromUTF8[256] = { 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Starting with Unicode 3.0.1: 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N]; 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * byte sequences with more than 4 bytes are illegal in UTF-8, 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which is tested with impossible values for them 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode * err) 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const unsigned char *mySource = (unsigned char *) args->source; 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *myTarget = args->target; 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *targetLimit = args->targetLimit; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned char *toUBytes = cnv->toUBytes; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t ch, ch2 = 0; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, inBytes; 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Restore size of current sequence */ 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cnv->toUnicodeStatus && myTarget < targetLimit) 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru inBytes = cnv->mode; /* restore # of bytes to consume */ 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = cnv->toULength; /* restore # of bytes consumed */ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = 0; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus = 0; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto morebytes; 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (mySource < sourceLimit && myTarget < targetLimit) 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = *(mySource++); 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch < 0x80) /* Simple case */ 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (UChar) ch; 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* store the first char */ 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUBytes[0] = (char)ch; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */ 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = 1; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querumorebytes: 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (i < inBytes) 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mySource < sourceLimit) 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUBytes[i] = (char) (ch2 = *mySource); 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!UTF8_IS_TRAIL(ch2)) 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* i < inBytes */ 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = (ch << 6) + ch2; 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++mySource; 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i++; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* stores a partially calculated target*/ 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus = ch; 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode = inBytes; 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = (int8_t) i; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto donefornow; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Remove the accumulated high bits */ 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch -= offsetsFromUTF8[inBytes]; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use only trail bytes after a lead byte (checked above) 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use the right number of trail bytes for a given lead byte 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - encode a code point <= U+10ffff 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use the fewest possible number of bytes for their code points 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[]) 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * There are no irregular sequences any more. 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In CESU-8, only surrogates, not supplementary code points, are encoded directly. 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] && 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch))) 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch <= MAXIMUM_UCS2) 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fits in 16 bits */ 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (UChar) ch; 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write out the surrogates */ 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch -= HALF_BASE; 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = (ch & HALF_MASK) + SURROGATE_LOW_START; 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (myTarget < targetLimit) 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (UChar)ch; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Put in overflow buffer (not handled here) */ 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0] = (UChar) ch; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength = 1; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = (int8_t)i; 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_CHAR_FOUND; 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querudonefornow: 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* End of target buffer */ 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = (const char *) mySource; 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args, 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode * err) 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const unsigned char *mySource = (unsigned char *) args->source; 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *myTarget = args->target; 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *myOffsets = args->offsets; 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offsetNum = 0; 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *targetLimit = args->targetLimit; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru unsigned char *toUBytes = cnv->toUBytes; 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t ch, ch2 = 0; 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, inBytes; 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Restore size of current sequence */ 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cnv->toUnicodeStatus && myTarget < targetLimit) 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru inBytes = cnv->mode; /* restore # of bytes to consume */ 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = cnv->toULength; /* restore # of bytes consumed */ 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = 0; 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus = 0; 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto morebytes; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (mySource < sourceLimit && myTarget < targetLimit) 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = *(mySource++); 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch < 0x80) /* Simple case */ 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (UChar) ch; 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myOffsets++) = offsetNum++; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUBytes[0] = (char)ch; 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru inBytes = bytesFromUTF8[ch]; 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = 1; 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querumorebytes: 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (i < inBytes) 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mySource < sourceLimit) 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUBytes[i] = (char) (ch2 = *mySource); 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!UTF8_IS_TRAIL(ch2)) 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* i < inBytes */ 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = (ch << 6) + ch2; 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++mySource; 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i++; 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUnicodeStatus = ch; 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->mode = inBytes; 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = (int8_t)i; 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto donefornow; 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Remove the accumulated high bits */ 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch -= offsetsFromUTF8[inBytes]; 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use only trail bytes after a lead byte (checked above) 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use the right number of trail bytes for a given lead byte 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - encode a code point <= U+10ffff 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use the fewest possible number of bytes for their code points 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[]) 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * There are no irregular sequences any more. 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * In CESU-8, only surrogates, not supplementary code points, are encoded directly. 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] && 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch))) 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch <= MAXIMUM_UCS2) 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fits in 16 bits */ 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (UChar) ch; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myOffsets++) = offsetNum; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write out the surrogates */ 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch -= HALF_BASE; 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myOffsets++) = offsetNum; 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = (ch & HALF_MASK) + SURROGATE_LOW_START; 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (myTarget < targetLimit) 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (UChar)ch; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myOffsets++) = offsetNum; 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBuffer[0] = (UChar) ch; 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->UCharErrorBufferLength = 1; 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsetNum += i; 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = (int8_t)i; 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_CHAR_FOUND; 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querudonefornow: 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { /* End of target buffer */ 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = myTarget; 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = (const char *) mySource; 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets = myOffsets; 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args, 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode * err) 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *mySource = args->source; 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *sourceLimit = args->sourceLimit; 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *myTarget = (uint8_t *) args->target; 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *targetLimit = (uint8_t *) args->targetLimit; 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *tempPtr; 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 ch; 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t tempBuf[4]; 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t indexToWrite; 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cnv->fromUChar32 && myTarget < targetLimit) 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = cnv->fromUChar32; 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32 = 0; 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto lowsurrogate; 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (mySource < sourceLimit && myTarget < targetLimit) 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = *(mySource++); 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch < 0x80) /* Single byte */ 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (uint8_t) ch; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if (ch < 0x800) /* Double byte */ 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (myTarget < targetLimit) 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength = 1; 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Check for surrogates */ 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE(ch) && isNotCESU8) { 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querulowsurrogate: 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mySource < sourceLimit) { 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test both code units */ 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) { 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert and consume this supplementary code point */ 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch=UTF16_GET_PAIR_VALUE(ch, *mySource); 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++mySource; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unpaired trail or lead code unit */ 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32 = ch; 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_CHAR_FOUND; 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32 = ch; 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Do we write the buffer directly for speed, 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru or do we have to be careful about target buffer space? */ 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch <= MAXIMUM_UCS2) { 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexToWrite = 2; 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexToWrite = 3; 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tempPtr == myTarget) { 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* There was enough space to write the codepoint directly. */ 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myTarget += (indexToWrite + 1); 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* We might run out of room soon. Write it slowly. */ 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (myTarget < targetLimit) { 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = *tempPtr; 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = (char *) myTarget; 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode * err) 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv = args->converter; 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *mySource = args->source; 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *myOffsets = args->offsets; 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *sourceLimit = args->sourceLimit; 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *myTarget = (uint8_t *) args->target; 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *targetLimit = (uint8_t *) args->targetLimit; 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *tempPtr; 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 ch; 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offsetNum, nextSourceIndex; 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t indexToWrite; 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t tempBuf[4]; 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cnv->fromUChar32 && myTarget < targetLimit) 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = cnv->fromUChar32; 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32 = 0; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsetNum = -1; 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex = 0; 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto lowsurrogate; 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsetNum = 0; 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (mySource < sourceLimit && myTarget < targetLimit) 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = *(mySource++); 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch < 0x80) /* Single byte */ 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myOffsets++) = offsetNum++; 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (char) ch; 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else if (ch < 0x800) /* Double byte */ 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myOffsets++) = offsetNum; 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (myTarget < targetLimit) 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myOffsets++) = offsetNum++; 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBufferLength = 1; 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Check for surrogates */ 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextSourceIndex = offsetNum + 1; 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE(ch) && isNotCESU8) { 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querulowsurrogate: 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mySource < sourceLimit) { 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test both code units */ 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) { 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert and consume this supplementary code point */ 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch=UTF16_GET_PAIR_VALUE(ch, *mySource); 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++mySource; 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++nextSourceIndex; 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* exit this condition tree */ 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* this is an unpaired trail or lead code unit */ 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* callback(illegal) */ 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32 = ch; 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_CHAR_FOUND; 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no more input */ 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32 = ch; 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Do we write the buffer directly for speed, 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru or do we have to be careful about target buffer space? */ 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch <= MAXIMUM_UCS2) { 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexToWrite = 2; 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexToWrite = 3; 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tempPtr == myTarget) { 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* There was enough space to write the codepoint directly. */ 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myTarget += (indexToWrite + 1); 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myOffsets[0] = offsetNum; 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myOffsets[1] = offsetNum; 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myOffsets[2] = offsetNum; 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (indexToWrite >= 3) { 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myOffsets[3] = offsetNum; 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myOffsets += (indexToWrite + 1); 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* We might run out of room soon. Write it slowly. */ 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (myTarget < targetLimit) 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myOffsets++) = offsetNum; 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *(myTarget++) = *tempPtr; 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsetNum = nextSourceIndex; 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_BUFFER_OVERFLOW_ERROR; 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->target = (char *) myTarget; 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = mySource; 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->offsets = myOffsets; 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *err) { 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *cnv; 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *sourceInitial; 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source; 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t extraBytesToWrite; 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t myByte; 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 ch; 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t i, isLegalSequence; 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */ 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv = args->converter; 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceInitial = source = (const uint8_t *)args->source; 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (source >= (const uint8_t *)args->sourceLimit) 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no input */ 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_INDEX_OUTOFBOUNDS_ERROR; 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru myByte = (uint8_t)*(source++); 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (myByte < 0x80) 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = (const char *)source; 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (UChar32)myByte; 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte]; 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (extraBytesToWrite == 0) { 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0] = myByte; 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = 1; 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_CHAR_FOUND; 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = (const char *)source; 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /*The byte sequence is longer than the buffer area passed*/ 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit) 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check if all of the remaining bytes are trail bytes */ 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[0] = myByte; 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = 1; 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_TRUNCATED_CHAR_FOUND; 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(source < (const uint8_t *)args->sourceLimit) { 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U8_IS_TRAIL(myByte = *source)) { 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[i++] = myByte; 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* error even before we run out of input */ 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_CHAR_FOUND; 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = i; 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = (const char *)source; 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isLegalSequence = 1; 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = myByte << 6; 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(extraBytesToWrite) 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* note: code falls through cases! (sic)*/ 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 6: 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch += (myByte = *source); 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch <<= 6; 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!UTF8_IS_TRAIL(myByte)) 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isLegalSequence = 0; 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 5: 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch += (myByte = *source); 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch <<= 6; 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!UTF8_IS_TRAIL(myByte)) 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isLegalSequence = 0; 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 4: 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch += (myByte = *source); 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch <<= 6; 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!UTF8_IS_TRAIL(myByte)) 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isLegalSequence = 0; 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch += (myByte = *source); 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch <<= 6; 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!UTF8_IS_TRAIL(myByte)) 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isLegalSequence = 0; 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch += (myByte = *source); 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!UTF8_IS_TRAIL(myByte)) 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isLegalSequence = 0; 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch -= offsetsFromUTF8[extraBytesToWrite]; 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru args->source = (const char *)source; 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use only trail bytes after a lead byte (checked above) 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use the right number of trail bytes for a given lead byte 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - encode a code point <= U+10ffff 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use the fewest possible number of bytes for their code points 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[]) 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * There are no irregular sequences any more. 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (isLegalSequence && 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t)ch <= MAXIMUM_UTF && 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t)ch >= utf8_minChar32[extraBytesToWrite] && 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru !U_IS_SURROGATE(ch) 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ch; /* return the code point */ 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i = 0; sourceInitial < source; ++i) { 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toUBytes[i] = *sourceInitial++; 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength = i; 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err = U_ILLEGAL_CHAR_FOUND; 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */ 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */ 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar32 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 }; 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar32 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */ 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterToUnicodeArgs *pToUArgs, 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter *utf8, *cnv; 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *source, *sourceLimit; 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *target; 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t targetCapacity; 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t count; 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t oldToULength, toULength, toULimit; 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t b, t1, t2; 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set up the local pointers */ 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8=pToUArgs->converter; 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv=pFromUArgs->converter; 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source=(uint8_t *)pToUArgs->source; 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit=(uint8_t *)pToUArgs->sourceLimit; 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=(uint8_t *)pFromUArgs->target; 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the converter state from the UTF-8 UConverter */ 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=(UChar32)utf8->toUnicodeStatus; 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toULength=oldToULength=utf8->toULength; 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toULimit=(int8_t)utf8->mode; 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toULength=oldToULength=toULimit=0; 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=(int32_t)(sourceLimit-source)+oldToULength; 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count<toULimit) { 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Not enough input to complete the partial character. 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Jump to moreBytes below - it will not output to target. 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(targetCapacity<toULimit) { 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Not enough target capacity to output the partial character. 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Let the standard converter handle this. 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_USING_DEFAULT_WARNING; 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Use a single counter for source and target, counting the minimum of 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the source length and the target capacity. 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * As a result, the source length is checked only once per multi-byte 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * character instead of twice. 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Make sure that the last byte sequence is complete, or else 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stop just before it. 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (The longest legal byte sequence has 3 trail bytes.) 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Count oldToULength (number of source bytes from a previous buffer) 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * into the source length but reduce the source index by toULimit 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * while going back over trail bytes in order to not go back into 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the bytes that will be read for finishing a partial 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sequence from the previous buffer. 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Let the standard converter handle edge cases. 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>targetCapacity) { 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=targetCapacity; 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=0; 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(i<3 && i<(count-toULimit)) { 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=source[count-oldToULength-i-1]; 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U8_IS_TRAIL(b)) { 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++i; 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<utf8_countTrailBytes[b]) { 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* stop converting before the lead byte if there are not enough trail bytes for it */ 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count-=i+1; 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c!=0) { 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toUnicodeStatus=0; 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toULength=0; 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru goto moreBytes; 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* See note in ucnv_SBCSFromUTF8() about this goto. */ 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* conversion loop */ 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(count>0) { 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source++; 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((int8_t)b>=0) { 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert ASCII */ 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=b; 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --count; 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(b>0xe0) { 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( /* handle U+1000..U+D7FF inline */ 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) || 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (b==0xed && (t1 <= 0x9f))) && 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (t2=source[1]) >= 0x80 && t2 <= 0xbf 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=b; 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=t1; 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=t2; 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count-=3; 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b<0xe0) { 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b>=0xc2 && 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (t1=*source) >= 0x80 && t1 <= 0xbf 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=b; 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=t1; 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count-=2; 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(b==0xe0) { 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( /* handle U+0800..U+0FFF inline */ 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (t1=source[0]) >= 0xa0 && t1 <= 0xbf && 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (t2=source[1]) >= 0x80 && t2 <= 0xbf 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source+=2; 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=b; 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=t1; 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=t2; 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count-=3; 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* handle "complicated" and error cases, and continuing partial characters */ 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru oldToULength=0; 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toULength=1; 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toULimit=utf8_countTrailBytes[b]+1; 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=b; 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerumoreBytes: 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(toULength<toULimit) { 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(source<sourceLimit) { 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source; 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U8_IS_TRAIL(b)) { 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++source; 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++toULength; 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=(c<<6)+b; 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* sequence too short, stop with toULength<toULimit */ 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */ 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source-=(toULength-oldToULength); 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(oldToULength<toULength) { 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toUBytes[oldToULength++]=*source++; 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toUnicodeStatus=c; 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toULength=toULength; 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->mode=toULimit; 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToUArgs->source=(char *)source; 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromUArgs->target=(char *)target; 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( toULength==toULimit && /* consumed all trail bytes */ 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (toULength==3 || toULength==2) && /* BMP */ 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (c<=0xd7ff || 0xe000<=c) /* not a surrogate */ 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* legal byte sequence for BMP code point */ 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if( 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toULength==toULimit && toULength==4 && 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff) 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* legal byte sequence for supplementary code point */ 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* error handling: illegal UTF-8 byte sequence */ 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source-=(toULength-oldToULength); 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(oldToULength<toULength) { 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toUBytes[oldToULength++]=*source++; 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toULength=toULength; 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToUArgs->source=(char *)source; 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromUArgs->target=(char *)target; 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy the legal byte sequence to the target */ 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t i; 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<oldToULength; ++i) { 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=utf8->toUBytes[i]; 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source-=(toULength-oldToULength); 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(; i<toULength; ++i) { 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *target++=*source++; 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count-=toULength; 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode) && source<sourceLimit) { 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(target==(const uint8_t *)pFromUArgs->targetLimit) { 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=*source; 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toULimit=utf8_countTrailBytes[b]+1; 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(toULimit>(sourceLimit-source)) { 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* collect a truncated byte sequence */ 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toULength=0; 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=b; 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toUBytes[toULength++]=b; 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++source==sourceLimit) { 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* partial byte sequence at end of source */ 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toUnicodeStatus=c; 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toULength=toULength; 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->mode=toULimit; 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(!U8_IS_TRAIL(b=*source)) { 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* lead byte in trail byte position */ 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utf8->toULength=toULength; 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_CHAR_FOUND; 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=(c<<6)+b; 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* partial-sequence target overflow: fall back to the pivoting implementation */ 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_USING_DEFAULT_WARNING; 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write back the updated pointers */ 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pToUArgs->source=(char *)source; 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pFromUArgs->target=(char *)target; 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-8 converter data ----------------------------------------------------- */ 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _UTF8Impl={ 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_UTF8, 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode_UTF8, 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode_UTF8_OFFSETS_LOGIC, 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode_UTF8, 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getNextUChar_UTF8, 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getNonSurrogateUnicodeSet, 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_UTF8FromUTF8, 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_UTF8FromUTF8 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The 1208 CCSID refers to any version of Unicode of UTF-8 */ 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _UTF8StaticData={ 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "UTF-8", 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1208, UCNV_IBM, UCNV_UTF8, 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _UTF8Data={ 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t) 0), 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_UTF8StaticData, FALSE, &_UTF8Impl, 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* CESU-8 converter data ---------------------------------------------------- */ 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _CESU8Impl={ 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_CESU8, 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode_UTF8, 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode_UTF8_OFFSETS_LOGIC, 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode_UTF8, 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getCompleteUnicodeSet 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _CESU8StaticData={ 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterStaticData), 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "CESU-8", 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 9400, /* CCSID for CESU-8 */ 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_UNKNOWN, UCNV_CESU8, 1, 3, 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _CESU8Data={ 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UConverterSharedData), ~((uint32_t) 0), 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, NULL, &_CESU8StaticData, FALSE, &_CESU8Impl, 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1085