16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org********************************************************************** 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2002-2012, International Business Machines 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org********************************************************************** 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: ucnv_u8.c 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2002jul01 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* UTF-8 converter implementation. Used to be in ucnv_utf.c. 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Also, CESU-8 implementation, see UTR 26. 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* The CESU-8 converter uses all the same functions as the 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* UTF-8 converter, with a branch for converting supplementary code points. 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_CONVERSION 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ucnv.h" 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf8.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucnv_bld.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucnv_cnv.h" 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Prototypes --------------------------------------------------------------- */ 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Keep these here to make finicky compilers happy */ 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args, 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *err); 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args, 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *err); 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UTF-8 -------------------------------------------------------------------- */ 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UTF-8 Conversion DATA 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/ 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAXIMUM_UCS2 0x0000FFFF 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAXIMUM_UTF 0x0010FFFF 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAXIMUM_UCS4 0x7FFFFFFF 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define HALF_SHIFT 10 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define HALF_BASE 0x0010000 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define HALF_MASK 0x3FF 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_HIGH_START 0xD800 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_HIGH_END 0xDBFF 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_LOW_START 0xDC00 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_LOW_END 0xDFFF 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* -SURROGATE_LOW_START + HALF_BASE */ 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_LOW_BASE 9216 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const uint32_t offsetsFromUTF8[7] = {0, 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* END OF UTF-8 Conversion DATA */ 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int8_t bytesFromUTF8[256] = { 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Starting with Unicode 3.0.1: 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N]; 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * byte sequences with more than 4 bytes are illegal in UTF-8, 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * which is tested with impossible values for them 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const uint32_t 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgutf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 904dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.orgstatic UBool hasCESU8Data(const UConverter *cnv) 914dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org{ 924dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org#if UCONFIG_NO_NON_HTML5_CONVERSION 934dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org return FALSE; 944dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org#else 954dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org return (UBool)(cnv->sharedData == &_CESU8Data); 964dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org#endif 974dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org} 984dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode * err) 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv = args->converter; 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const unsigned char *mySource = (unsigned char *) args->source; 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *myTarget = args->target; 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *targetLimit = args->targetLimit; 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unsigned char *toUBytes = cnv->toUBytes; 1084dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org UBool isCESU8 = hasCESU8Data(cnv); 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t ch, ch2 = 0; 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i, inBytes; 1114dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Restore size of current sequence */ 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cnv->toUnicodeStatus && myTarget < targetLimit) 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inBytes = cnv->mode; /* restore # of bytes to consume */ 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i = cnv->toULength; /* restore # of bytes consumed */ 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength = 0; 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUnicodeStatus = 0; 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto morebytes; 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (mySource < sourceLimit && myTarget < targetLimit) 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = *(mySource++); 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ch < 0x80) /* Simple case */ 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (UChar) ch; 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* store the first char */ 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toUBytes[0] = (char)ch; 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */ 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i = 1; 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgmorebytes: 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (i < inBytes) 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mySource < sourceLimit) 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toUBytes[i] = (char) (ch2 = *mySource); 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!U8_IS_TRAIL(ch2)) 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; /* i < inBytes */ 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = (ch << 6) + ch2; 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++mySource; 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i++; 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* stores a partially calculated target*/ 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUnicodeStatus = ch; 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->mode = inBytes; 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength = (int8_t) i; 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto donefornow; 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Remove the accumulated high bits */ 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch -= offsetsFromUTF8[inBytes]; 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use only trail bytes after a lead byte (checked above) 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use the right number of trail bytes for a given lead byte 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - encode a code point <= U+10ffff 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use the fewest possible number of bytes for their code points 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[]) 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * There are no irregular sequences any more. 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In CESU-8, only surrogates, not supplementary code points, are encoded directly. 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] && 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch))) 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ch <= MAXIMUM_UCS2) 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* fits in 16 bits */ 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (UChar) ch; 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write out the surrogates */ 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch -= HALF_BASE; 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = (ch & HALF_MASK) + SURROGATE_LOW_START; 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (myTarget < targetLimit) 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (UChar)ch; 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Put in overflow buffer (not handled here) */ 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBuffer[0] = (UChar) ch; 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBufferLength = 1; 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength = (int8_t)i; 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_ILLEGAL_CHAR_FOUND; 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgdonefornow: 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* End of target buffer */ 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->target = myTarget; 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->source = (const char *) mySource; 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args, 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode * err) 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv = args->converter; 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const unsigned char *mySource = (unsigned char *) args->source; 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *myTarget = args->target; 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *myOffsets = args->offsets; 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offsetNum = 0; 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *targetLimit = args->targetLimit; 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unsigned char *toUBytes = cnv->toUBytes; 2384dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org UBool isCESU8 = hasCESU8Data(cnv); 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t ch, ch2 = 0; 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i, inBytes; 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Restore size of current sequence */ 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cnv->toUnicodeStatus && myTarget < targetLimit) 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inBytes = cnv->mode; /* restore # of bytes to consume */ 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i = cnv->toULength; /* restore # of bytes consumed */ 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength = 0; 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUnicodeStatus = 0; 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto morebytes; 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (mySource < sourceLimit && myTarget < targetLimit) 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = *(mySource++); 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ch < 0x80) /* Simple case */ 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (UChar) ch; 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myOffsets++) = offsetNum++; 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toUBytes[0] = (char)ch; 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inBytes = bytesFromUTF8[ch]; 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i = 1; 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgmorebytes: 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (i < inBytes) 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mySource < sourceLimit) 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toUBytes[i] = (char) (ch2 = *mySource); 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!U8_IS_TRAIL(ch2)) 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; /* i < inBytes */ 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = (ch << 6) + ch2; 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++mySource; 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i++; 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUnicodeStatus = ch; 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->mode = inBytes; 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength = (int8_t)i; 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto donefornow; 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Remove the accumulated high bits */ 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch -= offsetsFromUTF8[inBytes]; 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use only trail bytes after a lead byte (checked above) 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use the right number of trail bytes for a given lead byte 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - encode a code point <= U+10ffff 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use the fewest possible number of bytes for their code points 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[]) 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * There are no irregular sequences any more. 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In CESU-8, only surrogates, not supplementary code points, are encoded directly. 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] && 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch))) 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ch <= MAXIMUM_UCS2) 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* fits in 16 bits */ 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (UChar) ch; 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myOffsets++) = offsetNum; 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write out the surrogates */ 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch -= HALF_BASE; 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myOffsets++) = offsetNum; 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = (ch & HALF_MASK) + SURROGATE_LOW_START; 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (myTarget < targetLimit) 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (UChar)ch; 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myOffsets++) = offsetNum; 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBuffer[0] = (UChar) ch; 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBufferLength = 1; 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsetNum += i; 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength = (int8_t)i; 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_ILLEGAL_CHAR_FOUND; 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgdonefornow: 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { /* End of target buffer */ 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->target = myTarget; 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->source = (const char *) mySource; 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->offsets = myOffsets; 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args, 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode * err) 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv = args->converter; 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *mySource = args->source; 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *sourceLimit = args->sourceLimit; 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *myTarget = (uint8_t *) args->target; 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *targetLimit = (uint8_t *) args->targetLimit; 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *tempPtr; 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 ch; 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t tempBuf[4]; 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t indexToWrite; 3694dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org UBool isNotCESU8 = !hasCESU8Data(cnv); 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cnv->fromUChar32 && myTarget < targetLimit) 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = cnv->fromUChar32; 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32 = 0; 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto lowsurrogate; 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (mySource < sourceLimit && myTarget < targetLimit) 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = *(mySource++); 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ch < 0x80) /* Single byte */ 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (uint8_t) ch; 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (ch < 0x800) /* Double byte */ 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (myTarget < targetLimit) 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBufferLength = 1; 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Check for surrogates */ 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE(ch) && isNotCESU8) { 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orglowsurrogate: 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mySource < sourceLimit) { 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test both code units */ 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert and consume this supplementary code point */ 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch=U16_GET_SUPPLEMENTARY(ch, *mySource); 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++mySource; 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* exit this condition tree */ 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unpaired trail or lead code unit */ 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32 = ch; 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_ILLEGAL_CHAR_FOUND; 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no more input */ 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32 = ch; 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Do we write the buffer directly for speed, 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org or do we have to be careful about target buffer space? */ 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ch <= MAXIMUM_UCS2) { 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexToWrite = 2; 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexToWrite = 3; 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (tempPtr == myTarget) { 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* There was enough space to write the codepoint directly. */ 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org myTarget += (indexToWrite + 1); 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* We might run out of room soon. Write it slowly. */ 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (myTarget < targetLimit) { 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = *tempPtr; 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->target = (char *) myTarget; 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->source = mySource; 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode * err) 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv = args->converter; 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *mySource = args->source; 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *myOffsets = args->offsets; 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *sourceLimit = args->sourceLimit; 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *myTarget = (uint8_t *) args->target; 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *targetLimit = (uint8_t *) args->targetLimit; 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *tempPtr; 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 ch; 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offsetNum, nextSourceIndex; 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t indexToWrite; 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t tempBuf[4]; 4854dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org UBool isNotCESU8 = !hasCESU8Data(cnv); 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cnv->fromUChar32 && myTarget < targetLimit) 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = cnv->fromUChar32; 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32 = 0; 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsetNum = -1; 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextSourceIndex = 0; 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto lowsurrogate; 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsetNum = 0; 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (mySource < sourceLimit && myTarget < targetLimit) 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = *(mySource++); 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ch < 0x80) /* Single byte */ 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myOffsets++) = offsetNum++; 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (char) ch; 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else if (ch < 0x800) /* Double byte */ 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myOffsets++) = offsetNum; 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (myTarget < targetLimit) 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myOffsets++) = offsetNum++; 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBufferLength = 1; 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Check for surrogates */ 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextSourceIndex = offsetNum + 1; 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE(ch) && isNotCESU8) { 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orglowsurrogate: 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mySource < sourceLimit) { 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test both code units */ 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert and consume this supplementary code point */ 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch=U16_GET_SUPPLEMENTARY(ch, *mySource); 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++mySource; 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* exit this condition tree */ 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unpaired trail or lead code unit */ 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32 = ch; 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_ILLEGAL_CHAR_FOUND; 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no more input */ 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32 = ch; 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Do we write the buffer directly for speed, 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org or do we have to be careful about target buffer space? */ 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ch <= MAXIMUM_UCS2) { 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexToWrite = 2; 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexToWrite = 3; 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (tempPtr == myTarget) { 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* There was enough space to write the codepoint directly. */ 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org myTarget += (indexToWrite + 1); 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org myOffsets[0] = offsetNum; 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org myOffsets[1] = offsetNum; 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org myOffsets[2] = offsetNum; 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (indexToWrite >= 3) { 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org myOffsets[3] = offsetNum; 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org myOffsets += (indexToWrite + 1); 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* We might run out of room soon. Write it slowly. */ 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (myTarget < targetLimit) 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myOffsets++) = offsetNum; 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *(myTarget++) = *tempPtr; 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsetNum = nextSourceIndex; 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_BUFFER_OVERFLOW_ERROR; 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->target = (char *) myTarget; 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->source = mySource; 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->offsets = myOffsets; 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *err) { 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *sourceInitial; 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *source; 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t extraBytesToWrite; 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t myByte; 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 ch; 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t i, isLegalSequence; 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */ 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv = args->converter; 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceInitial = source = (const uint8_t *)args->source; 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (source >= (const uint8_t *)args->sourceLimit) 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no input */ 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_INDEX_OUTOFBOUNDS_ERROR; 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org myByte = (uint8_t)*(source++); 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (myByte < 0x80) 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->source = (const char *)source; 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UChar32)myByte; 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte]; 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (extraBytesToWrite == 0) { 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUBytes[0] = myByte; 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength = 1; 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_ILLEGAL_CHAR_FOUND; 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->source = (const char *)source; 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /*The byte sequence is longer than the buffer area passed*/ 6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit) 6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* check if all of the remaining bytes are trail bytes */ 6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUBytes[0] = myByte; 6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i = 1; 6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_TRUNCATED_CHAR_FOUND; 6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source < (const uint8_t *)args->sourceLimit) { 6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U8_IS_TRAIL(myByte = *source)) { 6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUBytes[i++] = myByte; 6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* error even before we run out of input */ 6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_ILLEGAL_CHAR_FOUND; 6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength = i; 6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->source = (const char *)source; 6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isLegalSequence = 1; 6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch = myByte << 6; 6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(extraBytesToWrite) 6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* note: code falls through cases! (sic)*/ 6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 6: 6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch += (myByte = *source); 6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch <<= 6; 6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!U8_IS_TRAIL(myByte)) 6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isLegalSequence = 0; 6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 5: /*fall through*/ 6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch += (myByte = *source); 6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch <<= 6; 6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!U8_IS_TRAIL(myByte)) 6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isLegalSequence = 0; 6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 4: /*fall through*/ 6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch += (myByte = *source); 6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch <<= 6; 6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!U8_IS_TRAIL(myByte)) 6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isLegalSequence = 0; 6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 3: /*fall through*/ 7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch += (myByte = *source); 7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch <<= 6; 7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!U8_IS_TRAIL(myByte)) 7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isLegalSequence = 0; 7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: /*fall through*/ 7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch += (myByte = *source); 7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!U8_IS_TRAIL(myByte)) 7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isLegalSequence = 0; 7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }; 7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ch -= offsetsFromUTF8[extraBytesToWrite]; 7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args->source = (const char *)source; 7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Legal UTF-8 byte sequences in Unicode 3.0.1 and up: 7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use only trail bytes after a lead byte (checked above) 7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use the right number of trail bytes for a given lead byte 7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - encode a code point <= U+10ffff 7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use the fewest possible number of bytes for their code points 7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[]) 7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. 7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * There are no irregular sequences any more. 7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isLegalSequence && 7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (uint32_t)ch <= MAXIMUM_UTF && 7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (uint32_t)ch >= utf8_minChar32[extraBytesToWrite] && 7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org !U_IS_SURROGATE(ch) 7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ch; /* return the code point */ 7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i = 0; sourceInitial < source; ++i) { 7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUBytes[i] = *sourceInitial++; 7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength = i; 7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err = U_ILLEGAL_CHAR_FOUND; 7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */ 7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */ 7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar32 7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgutf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 }; 7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ 7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar32 7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgutf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; 7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */ 7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, 7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterToUnicodeArgs *pToUArgs, 7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *utf8; 7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *source, *sourceLimit; 7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *target; 7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t targetCapacity; 7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t count; 7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t oldToULength, toULength, toULimit; 7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t b, t1, t2; 7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8=pToUArgs->converter; 7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=(uint8_t *)pToUArgs->source; 7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=(uint8_t *)pToUArgs->sourceLimit; 7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=(uint8_t *)pFromUArgs->target; 7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); 7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the converter state from the UTF-8 UConverter */ 7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar32)utf8->toUnicodeStatus; 7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0) { 7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=oldToULength=utf8->toULength; 7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULimit=(int8_t)utf8->mode; 7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=oldToULength=toULimit=0; 7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count=(int32_t)(sourceLimit-source)+oldToULength; 7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(count<toULimit) { 7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Not enough input to complete the partial character. 7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Jump to moreBytes below - it will not output to target. 7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(targetCapacity<toULimit) { 7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Not enough target capacity to output the partial character. 8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Let the standard converter handle this. 8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_USING_DEFAULT_WARNING; 8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Use a single counter for source and target, counting the minimum of 8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the source length and the target capacity. 8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * As a result, the source length is checked only once per multi-byte 8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * character instead of twice. 8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Make sure that the last byte sequence is complete, or else 8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * stop just before it. 8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (The longest legal byte sequence has 3 trail bytes.) 8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Count oldToULength (number of source bytes from a previous buffer) 8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * into the source length but reduce the source index by toULimit 8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * while going back over trail bytes in order to not go back into 8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the bytes that will be read for finishing a partial 8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * sequence from the previous buffer. 8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Let the standard converter handle edge cases. 8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(count>targetCapacity) { 8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count=targetCapacity; 8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i=0; 8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(i<3 && i<(count-toULimit)) { 8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=source[count-oldToULength-i-1]; 8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U8_IS_TRAIL(b)) { 8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i; 8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<U8_COUNT_TRAIL_BYTES(b)) { 8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* stop converting before the lead byte if there are not enough trail bytes for it */ 8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count-=i+1; 8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0) { 8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUnicodeStatus=0; 8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=0; 8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto moreBytes; 8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* See note in ucnv_SBCSFromUTF8() about this goto. */ 8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(count>0) { 8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=*source++; 8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((int8_t)b>=0) { 8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert ASCII */ 8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=b; 8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --count; 8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(b>0xe0) { 8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( /* handle U+1000..U+D7FF inline */ 8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) || 8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (b==0xed && (t1 <= 0x9f))) && 8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t2=source[1]) >= 0x80 && t2 <= 0xbf 8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source+=2; 8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=b; 8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=t1; 8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=t2; 8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count-=3; 8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(b<0xe0) { 8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( /* handle U+0080..U+07FF inline */ 8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b>=0xc2 && 8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t1=*source) >= 0x80 && t1 <= 0xbf 8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=b; 8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=t1; 8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count-=2; 8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(b==0xe0) { 8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( /* handle U+0800..U+0FFF inline */ 8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t1=source[0]) >= 0xa0 && t1 <= 0xbf && 8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t2=source[1]) >= 0x80 && t2 <= 0xbf 8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source+=2; 8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=b; 8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=t1; 8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=t2; 8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count-=3; 8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* handle "complicated" and error cases, and continuing partial characters */ 8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oldToULength=0; 8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=1; 8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=b; 9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgmoreBytes: 9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(toULength<toULimit) { 9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(source<sourceLimit) { 9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=*source; 9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U8_IS_TRAIL(b)) { 9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++toULength; 9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)+b; 9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; /* sequence too short, stop with toULength<toULimit */ 9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */ 9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=(toULength-oldToULength); 9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(oldToULength<toULength) { 9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUBytes[oldToULength++]=*source++; 9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUnicodeStatus=c; 9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->mode=toULimit; 9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pToUArgs->source=(char *)source; 9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->target=(char *)target; 9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( toULength==toULimit && /* consumed all trail bytes */ 9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (toULength==3 || toULength==2) && /* BMP */ 9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && 9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c<=0xd7ff || 0xe000<=c) /* not a surrogate */ 9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* legal byte sequence for BMP code point */ 9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if( 9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength==toULimit && toULength==4 && 9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff) 9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* legal byte sequence for supplementary code point */ 9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* error handling: illegal UTF-8 byte sequence */ 9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=(toULength-oldToULength); 9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(oldToULength<toULength) { 9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUBytes[oldToULength++]=*source++; 9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pToUArgs->source=(char *)source; 9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->target=(char *)target; 9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* copy the legal byte sequence to the target */ 9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t i; 9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<oldToULength; ++i) { 9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=utf8->toUBytes[i]; 9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=(toULength-oldToULength); 9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(; i<toULength; ++i) { 9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=*source++; 9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count-=toULength; 9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(*pErrorCode) && source<sourceLimit) { 9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(target==(const uint8_t *)pFromUArgs->targetLimit) { 9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=*source; 9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(toULimit>(sourceLimit-source)) { 9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* collect a truncated byte sequence */ 9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=0; 9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=b; 9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;) { 9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUBytes[toULength++]=b; 9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(++source==sourceLimit) { 9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* partial byte sequence at end of source */ 9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUnicodeStatus=c; 9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->mode=toULimit; 9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(!U8_IS_TRAIL(b=*source)) { 9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* lead byte in trail byte position */ 9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)+b; 9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* partial-sequence target overflow: fall back to the pivoting implementation */ 9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_USING_DEFAULT_WARNING; 9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pToUArgs->source=(char *)source; 10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->target=(char *)target; 10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UTF-8 converter data ----------------------------------------------------- */ 10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterImpl _UTF8Impl={ 10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_UTF8, 10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUnicode_UTF8, 10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUnicode_UTF8_OFFSETS_LOGIC, 10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_fromUnicode_UTF8, 10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, 10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_getNextUChar_UTF8, 10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_getNonSurrogateUnicodeSet, 10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_UTF8FromUTF8, 10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_UTF8FromUTF8 10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* The 1208 CCSID refers to any version of Unicode of UTF-8 */ 10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterStaticData _UTF8StaticData={ 10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sizeof(UConverterStaticData), 10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org "UTF-8", 10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1208, UCNV_IBM, UCNV_UTF8, 10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ 10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, 10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UConverterSharedData _UTF8Data={ 10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sizeof(UConverterSharedData), ~((uint32_t) 0), 10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, NULL, &_UTF8StaticData, FALSE, &_UTF8Impl, 10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0 10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* CESU-8 converter data ---------------------------------------------------- */ 10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterImpl _CESU8Impl={ 10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_CESU8, 10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUnicode_UTF8, 10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUnicode_UTF8_OFFSETS_LOGIC, 10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_fromUnicode_UTF8, 10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, 10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_getCompleteUnicodeSet 10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterStaticData _CESU8StaticData={ 10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sizeof(UConverterStaticData), 10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org "CESU-8", 10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9400, /* CCSID for CESU-8 */ 10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_UNKNOWN, UCNV_CESU8, 1, 3, 10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, 10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UConverterSharedData _CESU8Data={ 10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sizeof(UConverterSharedData), ~((uint32_t) 0), 10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, NULL, &_CESU8StaticData, FALSE, &_CESU8Impl, 10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0 10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 1096