16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 2002-2012, International Business Machines
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   file name:  ucnv_u8.c
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   encoding:   US-ASCII
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   tab size:   8 (not used)
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   indentation:4
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created on: 2002jul01
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created by: Markus W. Scherer
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   UTF-8 converter implementation. Used to be in ucnv_utf.c.
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Also, CESU-8 implementation, see UTR 26.
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   The CESU-8 converter uses all the same functions as the
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   UTF-8 converter, with a branch for converting supplementary code points.
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_CONVERSION
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ucnv.h"
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf.h"
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf8.h"
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucnv_bld.h"
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucnv_cnv.h"
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Prototypes --------------------------------------------------------------- */
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Keep these here to make finicky compilers happy */
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args,
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                           UErrorCode *err);
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args,
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                        UErrorCode *err);
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UTF-8 -------------------------------------------------------------------- */
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UTF-8 Conversion DATA
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *   for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAXIMUM_UCS2            0x0000FFFF
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAXIMUM_UTF             0x0010FFFF
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAXIMUM_UCS4            0x7FFFFFFF
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define HALF_SHIFT              10
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define HALF_BASE               0x0010000
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define HALF_MASK               0x3FF
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_HIGH_START    0xD800
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_HIGH_END      0xDBFF
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_LOW_START     0xDC00
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_LOW_END       0xDFFF
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* -SURROGATE_LOW_START + HALF_BASE */
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SURROGATE_LOW_BASE      9216
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const uint32_t offsetsFromUTF8[7] = {0,
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* END OF UTF-8 Conversion DATA */
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int8_t bytesFromUTF8[256] = {
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Starting with Unicode 3.0.1:
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N];
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * byte sequences with more than 4 bytes are illegal in UTF-8,
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * which is tested with impossible values for them
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const uint32_t
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgutf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
904dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.orgstatic UBool hasCESU8Data(const UConverter *cnv)
914dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org{
924dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org#if UCONFIG_NO_NON_HTML5_CONVERSION
934dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org    return FALSE;
944dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org#else
954dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org    return (UBool)(cnv->sharedData == &_CESU8Data);
964dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org#endif
974dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org}
984dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                  UErrorCode * err)
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UConverter *cnv = args->converter;
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const unsigned char *mySource = (unsigned char *) args->source;
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *myTarget = args->target;
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *targetLimit = args->targetLimit;
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    unsigned char *toUBytes = cnv->toUBytes;
1084dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org    UBool isCESU8 = hasCESU8Data(cnv);
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t ch, ch2 = 0;
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i, inBytes;
1114dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Restore size of current sequence */
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (cnv->toUnicodeStatus && myTarget < targetLimit)
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        inBytes = cnv->mode;            /* restore # of bytes to consume */
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        i = cnv->toULength;             /* restore # of bytes consumed */
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->toULength = 0;
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->toUnicodeStatus = 0;
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto morebytes;
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (mySource < sourceLimit && myTarget < targetLimit)
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch = *(mySource++);
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (ch < 0x80)        /* Simple case */
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(myTarget++) = (UChar) ch;
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* store the first char */
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            toUBytes[0] = (char)ch;
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            i = 1;
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgmorebytes:
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            while (i < inBytes)
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (mySource < sourceLimit)
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                {
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    toUBytes[i] = (char) (ch2 = *mySource);
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (!U8_IS_TRAIL(ch2))
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    {
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break; /* i < inBytes */
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ch = (ch << 6) + ch2;
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++mySource;
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    i++;
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                else
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                {
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* stores a partially calculated target*/
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    cnv->toUnicodeStatus = ch;
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    cnv->mode = inBytes;
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    cnv->toULength = (int8_t) i;
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    goto donefornow;
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* Remove the accumulated high bits */
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ch -= offsetsFromUTF8[inBytes];
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /*
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - use only trail bytes after a lead byte (checked above)
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - use the right number of trail bytes for a given lead byte
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - encode a code point <= U+10ffff
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - use the fewest possible number of bytes for their code points
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             *
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * There are no irregular sequences any more.
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             */
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (ch <= MAXIMUM_UCS2)
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                {
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* fits in 16 bits */
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *(myTarget++) = (UChar) ch;
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                else
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                {
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* write out the surrogates */
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ch -= HALF_BASE;
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (myTarget < targetLimit)
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    {
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *(myTarget++) = (UChar)ch;
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    else
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    {
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* Put in overflow buffer (not handled here) */
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        cnv->UCharErrorBuffer[0] = (UChar) ch;
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        cnv->UCharErrorBufferLength = 1;
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *err = U_BUFFER_OVERFLOW_ERROR;
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                cnv->toULength = (int8_t)i;
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *err = U_ILLEGAL_CHAR_FOUND;
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgdonefornow:
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* End of target buffer */
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *err = U_BUFFER_OVERFLOW_ERROR;
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->target = myTarget;
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->source = (const char *) mySource;
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                UErrorCode * err)
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UConverter *cnv = args->converter;
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const unsigned char *mySource = (unsigned char *) args->source;
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar *myTarget = args->target;
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t *myOffsets = args->offsets;
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t offsetNum = 0;
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *targetLimit = args->targetLimit;
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    unsigned char *toUBytes = cnv->toUBytes;
2384dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org    UBool isCESU8 = hasCESU8Data(cnv);
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t ch, ch2 = 0;
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i, inBytes;
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* Restore size of current sequence */
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (cnv->toUnicodeStatus && myTarget < targetLimit)
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        inBytes = cnv->mode;            /* restore # of bytes to consume */
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        i = cnv->toULength;             /* restore # of bytes consumed */
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->toULength = 0;
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->toUnicodeStatus = 0;
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto morebytes;
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (mySource < sourceLimit && myTarget < targetLimit)
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch = *(mySource++);
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (ch < 0x80)        /* Simple case */
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(myTarget++) = (UChar) ch;
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(myOffsets++) = offsetNum++;
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            toUBytes[0] = (char)ch;
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            inBytes = bytesFromUTF8[ch];
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            i = 1;
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgmorebytes:
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            while (i < inBytes)
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (mySource < sourceLimit)
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                {
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    toUBytes[i] = (char) (ch2 = *mySource);
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (!U8_IS_TRAIL(ch2))
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    {
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break; /* i < inBytes */
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ch = (ch << 6) + ch2;
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++mySource;
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    i++;
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                else
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                {
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    cnv->toUnicodeStatus = ch;
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    cnv->mode = inBytes;
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    cnv->toULength = (int8_t)i;
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    goto donefornow;
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* Remove the accumulated high bits */
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ch -= offsetsFromUTF8[inBytes];
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /*
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - use only trail bytes after a lead byte (checked above)
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - use the right number of trail bytes for a given lead byte
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - encode a code point <= U+10ffff
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - use the fewest possible number of bytes for their code points
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             *
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * There are no irregular sequences any more.
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             */
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (ch <= MAXIMUM_UCS2)
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                {
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* fits in 16 bits */
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *(myTarget++) = (UChar) ch;
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *(myOffsets++) = offsetNum;
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                else
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                {
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* write out the surrogates */
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ch -= HALF_BASE;
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *(myOffsets++) = offsetNum;
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (myTarget < targetLimit)
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    {
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *(myTarget++) = (UChar)ch;
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *(myOffsets++) = offsetNum;
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    else
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    {
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        cnv->UCharErrorBuffer[0] = (UChar) ch;
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        cnv->UCharErrorBufferLength = 1;
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *err = U_BUFFER_OVERFLOW_ERROR;
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                offsetNum += i;
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                cnv->toULength = (int8_t)i;
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *err = U_ILLEGAL_CHAR_FOUND;
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgdonefornow:
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {   /* End of target buffer */
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *err = U_BUFFER_OVERFLOW_ERROR;
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->target = myTarget;
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->source = (const char *) mySource;
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->offsets = myOffsets;
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                    UErrorCode * err)
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UConverter *cnv = args->converter;
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *mySource = args->source;
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *sourceLimit = args->sourceLimit;
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t *myTarget = (uint8_t *) args->target;
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t *tempPtr;
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 ch;
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t tempBuf[4];
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t indexToWrite;
3694dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org    UBool isNotCESU8 = !hasCESU8Data(cnv);
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (cnv->fromUChar32 && myTarget < targetLimit)
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch = cnv->fromUChar32;
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->fromUChar32 = 0;
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto lowsurrogate;
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (mySource < sourceLimit && myTarget < targetLimit)
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch = *(mySource++);
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (ch < 0x80)        /* Single byte */
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(myTarget++) = (uint8_t) ch;
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (ch < 0x800)  /* Double byte */
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (myTarget < targetLimit)
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                cnv->charErrorBufferLength = 1;
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *err = U_BUFFER_OVERFLOW_ERROR;
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else {
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* Check for surrogates */
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U16_IS_SURROGATE(ch) && isNotCESU8) {
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orglowsurrogate:
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (mySource < sourceLimit) {
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* test both code units */
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* convert and consume this supplementary code point */
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ++mySource;
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* exit this condition tree */
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    else {
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* this is an unpaired trail or lead code unit */
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* callback(illegal) */
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        cnv->fromUChar32 = ch;
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *err = U_ILLEGAL_CHAR_FOUND;
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                else {
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* no more input */
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    cnv->fromUChar32 = ch;
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* Do we write the buffer directly for speed,
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            or do we have to be careful about target buffer space? */
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (ch <= MAXIMUM_UCS2) {
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                indexToWrite = 2;
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else {
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                indexToWrite = 3;
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (tempPtr == myTarget) {
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* There was enough space to write the codepoint directly. */
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                myTarget += (indexToWrite + 1);
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else {
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* We might run out of room soon. Write it slowly. */
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (myTarget < targetLimit) {
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *(myTarget++) = *tempPtr;
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    else {
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *err = U_BUFFER_OVERFLOW_ERROR;
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *err = U_BUFFER_OVERFLOW_ERROR;
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->target = (char *) myTarget;
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->source = mySource;
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                  UErrorCode * err)
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UConverter *cnv = args->converter;
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *mySource = args->source;
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t *myOffsets = args->offsets;
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UChar *sourceLimit = args->sourceLimit;
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t *myTarget = (uint8_t *) args->target;
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t *tempPtr;
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 ch;
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t offsetNum, nextSourceIndex;
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t indexToWrite;
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t tempBuf[4];
4854dfa619cf375ebb67b7b9311487d19a4129f742fjshin@chromium.org    UBool isNotCESU8 = !hasCESU8Data(cnv);
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (cnv->fromUChar32 && myTarget < targetLimit)
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch = cnv->fromUChar32;
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->fromUChar32 = 0;
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        offsetNum = -1;
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextSourceIndex = 0;
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto lowsurrogate;
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        offsetNum = 0;
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (mySource < sourceLimit && myTarget < targetLimit)
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch = *(mySource++);
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (ch < 0x80)        /* Single byte */
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(myOffsets++) = offsetNum++;
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(myTarget++) = (char) ch;
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else if (ch < 0x800)  /* Double byte */
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(myOffsets++) = offsetNum;
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (myTarget < targetLimit)
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *(myOffsets++) = offsetNum++;
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                cnv->charErrorBufferLength = 1;
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *err = U_BUFFER_OVERFLOW_ERROR;
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* Check for surrogates */
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            nextSourceIndex = offsetNum + 1;
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U16_IS_SURROGATE(ch) && isNotCESU8) {
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orglowsurrogate:
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (mySource < sourceLimit) {
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* test both code units */
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* convert and consume this supplementary code point */
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ++mySource;
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ++nextSourceIndex;
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* exit this condition tree */
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    else {
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* this is an unpaired trail or lead code unit */
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* callback(illegal) */
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        cnv->fromUChar32 = ch;
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *err = U_ILLEGAL_CHAR_FOUND;
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                else {
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* no more input */
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    cnv->fromUChar32 = ch;
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    break;
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* Do we write the buffer directly for speed,
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            or do we have to be careful about target buffer space? */
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (ch <= MAXIMUM_UCS2) {
5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                indexToWrite = 2;
5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else {
5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                indexToWrite = 3;
5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (tempPtr == myTarget) {
5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* There was enough space to write the codepoint directly. */
5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                myTarget += (indexToWrite + 1);
5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                myOffsets[0] = offsetNum;
5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                myOffsets[1] = offsetNum;
5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                myOffsets[2] = offsetNum;
5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (indexToWrite >= 3) {
5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    myOffsets[3] = offsetNum;
5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                myOffsets += (indexToWrite + 1);
5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            else {
5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* We might run out of room soon. Write it slowly. */
5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (myTarget < targetLimit)
5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    {
5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *(myOffsets++) = offsetNum;
5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *(myTarget++) = *tempPtr;
5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    else
5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    {
5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *err = U_BUFFER_OVERFLOW_ERROR;
5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            offsetNum = nextSourceIndex;
5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *err = U_BUFFER_OVERFLOW_ERROR;
6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->target = (char *) myTarget;
6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->source = mySource;
6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->offsets = myOffsets;
6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                               UErrorCode *err) {
6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UConverter *cnv;
6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const uint8_t *sourceInitial;
6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const uint8_t *source;
6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t extraBytesToWrite;
6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t myByte;
6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 ch;
6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int8_t i, isLegalSequence;
6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */
6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    cnv = args->converter;
6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sourceInitial = source = (const uint8_t *)args->source;
6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (source >= (const uint8_t *)args->sourceLimit)
6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* no input */
6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *err = U_INDEX_OUTOFBOUNDS_ERROR;
6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0xffff;
6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    myByte = (uint8_t)*(source++);
6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (myByte < 0x80)
6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        args->source = (const char *)source;
6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return (UChar32)myByte;
6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte];
6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (extraBytesToWrite == 0) {
6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->toUBytes[0] = myByte;
6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->toULength = 1;
6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *err = U_ILLEGAL_CHAR_FOUND;
6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        args->source = (const char *)source;
6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0xffff;
6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*The byte sequence is longer than the buffer area passed*/
6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit)
6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* check if all of the remaining bytes are trail bytes */
6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->toUBytes[0] = myByte;
6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        i = 1;
6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *err = U_TRUNCATED_CHAR_FOUND;
6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while(source < (const uint8_t *)args->sourceLimit) {
6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U8_IS_TRAIL(myByte = *source)) {
6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                cnv->toUBytes[i++] = myByte;
6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++source;
6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* error even before we run out of input */
6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *err = U_ILLEGAL_CHAR_FOUND;
6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->toULength = i;
6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        args->source = (const char *)source;
6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return 0xffff;
6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isLegalSequence = 1;
6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ch = myByte << 6;
6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch(extraBytesToWrite)
6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    {
6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      /* note: code falls through cases! (sic)*/
6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 6:
6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch += (myByte = *source);
6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch <<= 6;
6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!U8_IS_TRAIL(myByte))
6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isLegalSequence = 0;
6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++source;
6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 5: /*fall through*/
6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch += (myByte = *source);
6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch <<= 6;
6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!U8_IS_TRAIL(myByte))
6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isLegalSequence = 0;
6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++source;
6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 4: /*fall through*/
6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch += (myByte = *source);
6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch <<= 6;
6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!U8_IS_TRAIL(myByte))
6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isLegalSequence = 0;
6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++source;
7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 3: /*fall through*/
7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch += (myByte = *source);
7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch <<= 6;
7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!U8_IS_TRAIL(myByte))
7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isLegalSequence = 0;
7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++source;
7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    case 2: /*fall through*/
7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ch += (myByte = *source);
7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (!U8_IS_TRAIL(myByte))
7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            isLegalSequence = 0;
7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++source;
7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    };
7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ch -= offsetsFromUTF8[extraBytesToWrite];
7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    args->source = (const char *)source;
7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /*
7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * - use only trail bytes after a lead byte (checked above)
7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * - use the right number of trail bytes for a given lead byte
7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * - encode a code point <= U+10ffff
7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * - use the fewest possible number of bytes for their code points
7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * There are no irregular sequences any more.
7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (isLegalSequence &&
7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (uint32_t)ch <= MAXIMUM_UTF &&
7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (uint32_t)ch >= utf8_minChar32[extraBytesToWrite] &&
7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        !U_IS_SURROGATE(ch)
7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ) {
7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return ch; /* return the code point */
7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i = 0; sourceInitial < source; ++i) {
7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        cnv->toUBytes[i] = *sourceInitial++;
7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    cnv->toULength = i;
7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    *err = U_ILLEGAL_CHAR_FOUND;
7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return 0xffff;
7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */
7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar32
7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgutf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar32
7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgutf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void
7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                  UConverterToUnicodeArgs *pToUArgs,
7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                  UErrorCode *pErrorCode) {
7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UConverter *utf8;
7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const uint8_t *source, *sourceLimit;
7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t *target;
7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t targetCapacity;
7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t count;
7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int8_t oldToULength, toULength, toULimit;
7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 c;
7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint8_t b, t1, t2;
7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* set up the local pointers */
7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utf8=pToUArgs->converter;
7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    source=(uint8_t *)pToUArgs->source;
7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    target=(uint8_t *)pFromUArgs->target;
7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* get the converter state from the UTF-8 UConverter */
7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    c=(UChar32)utf8->toUnicodeStatus;
7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(c!=0) {
7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        toULength=oldToULength=utf8->toULength;
7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        toULimit=(int8_t)utf8->mode;
7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        toULength=oldToULength=toULimit=0;
7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    count=(int32_t)(sourceLimit-source)+oldToULength;
7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(count<toULimit) {
7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Not enough input to complete the partial character.
7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Jump to moreBytes below - it will not output to target.
7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else if(targetCapacity<toULimit) {
7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Not enough target capacity to output the partial character.
8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Let the standard converter handle this.
8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *pErrorCode=U_USING_DEFAULT_WARNING;
8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Use a single counter for source and target, counting the minimum of
8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * the source length and the target capacity.
8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * As a result, the source length is checked only once per multi-byte
8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * character instead of twice.
8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         *
8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Make sure that the last byte sequence is complete, or else
8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * stop just before it.
8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * (The longest legal byte sequence has 3 trail bytes.)
8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Count oldToULength (number of source bytes from a previous buffer)
8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * into the source length but reduce the source index by toULimit
8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * while going back over trail bytes in order to not go back into
8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * the bytes that will be read for finishing a partial
8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * sequence from the previous buffer.
8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * Let the standard converter handle edge cases.
8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t i;
8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(count>targetCapacity) {
8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            count=targetCapacity;
8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        i=0;
8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while(i<3 && i<(count-toULimit)) {
8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            b=source[count-oldToULength-i-1];
8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(U8_IS_TRAIL(b)) {
8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ++i;
8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(i<U8_COUNT_TRAIL_BYTES(b)) {
8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* stop converting before the lead byte if there are not enough trail bytes for it */
8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    count-=i+1;
8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                break;
8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(c!=0) {
8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utf8->toUnicodeStatus=0;
8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        utf8->toULength=0;
8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        goto moreBytes;
8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* See note in ucnv_SBCSFromUTF8() about this goto. */
8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* conversion loop */
8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(count>0) {
8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        b=*source++;
8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if((int8_t)b>=0) {
8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* convert ASCII */
8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *target++=b;
8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            --count;
8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            continue;
8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(b>0xe0) {
8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if( /* handle U+1000..U+D7FF inline */
8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    (t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) ||
8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                               (b==0xed && (t1 <= 0x9f))) &&
8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    (t2=source[1]) >= 0x80 && t2 <= 0xbf
8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ) {
8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    source+=2;
8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=b;
8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=t1;
8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=t2;
8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    count-=3;
8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    continue;
8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(b<0xe0) {
8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if( /* handle U+0080..U+07FF inline */
8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    b>=0xc2 &&
8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    (t1=*source) >= 0x80 && t1 <= 0xbf
8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ) {
8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    ++source;
8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=b;
8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=t1;
8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    count-=2;
8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    continue;
8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(b==0xe0) {
8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if( /* handle U+0800..U+0FFF inline */
8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    (t1=source[0]) >= 0xa0 && t1 <= 0xbf &&
8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    (t2=source[1]) >= 0x80 && t2 <= 0xbf
8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                ) {
8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    source+=2;
8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=b;
8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=t1;
8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=t2;
8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    count-=3;
8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    continue;
8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* handle "complicated" and error cases, and continuing partial characters */
8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            oldToULength=0;
8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            toULength=1;
8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c=b;
9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgmoreBytes:
9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            while(toULength<toULimit) {
9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(source<sourceLimit) {
9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    b=*source;
9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(U8_IS_TRAIL(b)) {
9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ++source;
9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        ++toULength;
9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        c=(c<<6)+b;
9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else {
9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break; /* sequence too short, stop with toULength<toULimit */
9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                } else {
9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    source-=(toULength-oldToULength);
9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    while(oldToULength<toULength) {
9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utf8->toUBytes[oldToULength++]=*source++;
9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utf8->toUnicodeStatus=c;
9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utf8->toULength=toULength;
9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utf8->mode=toULimit;
9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    pToUArgs->source=(char *)source;
9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    pFromUArgs->target=(char *)target;
9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return;
9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if( toULength==toULimit &&      /* consumed all trail bytes */
9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (toULength==3 || toULength==2) &&             /* BMP */
9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (c<=0xd7ff || 0xe000<=c)    /* not a surrogate */
9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ) {
9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* legal byte sequence for BMP code point */
9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else if(
9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                toULength==toULimit && toULength==4 &&
9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ) {
9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* legal byte sequence for supplementary code point */
9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* error handling: illegal UTF-8 byte sequence */
9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                source-=(toULength-oldToULength);
9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                while(oldToULength<toULength) {
9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utf8->toUBytes[oldToULength++]=*source++;
9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                utf8->toULength=toULength;
9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                pToUArgs->source=(char *)source;
9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                pFromUArgs->target=(char *)target;
9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return;
9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* copy the legal byte sequence to the target */
9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            {
9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                int8_t i;
9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(i=0; i<oldToULength; ++i) {
9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=utf8->toUBytes[i];
9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                source-=(toULength-oldToULength);
9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(; i<toULength; ++i) {
9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    *target++=*source++;
9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                count-=toULength;
9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_SUCCESS(*pErrorCode) && source<sourceLimit) {
9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(target==(const uint8_t *)pFromUArgs->targetLimit) {
9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            b=*source;
9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(toULimit>(sourceLimit-source)) {
9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* collect a truncated byte sequence */
9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                toULength=0;
9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                c=b;
9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                for(;;) {
9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    utf8->toUBytes[toULength++]=b;
9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(++source==sourceLimit) {
9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* partial byte sequence at end of source */
9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utf8->toUnicodeStatus=c;
9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utf8->toULength=toULength;
9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utf8->mode=toULimit;
9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    } else if(!U8_IS_TRAIL(b=*source)) {
9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* lead byte in trail byte position */
9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        utf8->toULength=toULength;
9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        break;
9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    c=(c<<6)+b;
9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* partial-sequence target overflow: fall back to the pivoting implementation */
9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                *pErrorCode=U_USING_DEFAULT_WARNING;
9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* write back the updated pointers */
10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pToUArgs->source=(char *)source;
10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pFromUArgs->target=(char *)target;
10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UTF-8 converter data ----------------------------------------------------- */
10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterImpl _UTF8Impl={
10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UCNV_UTF8,
10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_toUnicode_UTF8,
10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_fromUnicode_UTF8,
10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_getNextUChar_UTF8,
10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_getNonSurrogateUnicodeSet,
10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_UTF8FromUTF8,
10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_UTF8FromUTF8
10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* The 1208 CCSID refers to any version of Unicode of UTF-8 */
10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterStaticData _UTF8StaticData={
10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sizeof(UConverterStaticData),
10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "UTF-8",
10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    1208, UCNV_IBM, UCNV_UTF8,
10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0,
10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0,
10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UConverterSharedData _UTF8Data={
10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sizeof(UConverterSharedData), ~((uint32_t) 0),
10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL, NULL, &_UTF8StaticData, FALSE, &_UTF8Impl,
10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0
10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* CESU-8 converter data ---------------------------------------------------- */
10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterImpl _CESU8Impl={
10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UCNV_CESU8,
10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_toUnicode_UTF8,
10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_fromUnicode_UTF8,
10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL,
10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ucnv_getCompleteUnicodeSet
10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterStaticData _CESU8StaticData={
10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sizeof(UConverterStaticData),
10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "CESU-8",
10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    9400, /* CCSID for CESU-8 */
10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0,
10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0,
10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UConverterSharedData _CESU8Data={
10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sizeof(UConverterSharedData), ~((uint32_t) 0),
10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    NULL, NULL, &_CESU8StaticData, FALSE, &_CESU8Impl,
10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0
10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
1096