1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Copyright (C) 2002-2007, International Business Machines
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  ucnv_u8.c
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2002jul01
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   UTF-8 converter implementation. Used to be in ucnv_utf.c.
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Also, CESU-8 implementation, see UTR 26.
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   The CESU-8 converter uses all the same functions as the
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   UTF-8 converter, with a branch for converting supplementary code points.
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Prototypes --------------------------------------------------------------- */
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Keep these here to make finicky compilers happy */
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args,
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                           UErrorCode *err);
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args,
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                        UErrorCode *err);
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-8 -------------------------------------------------------------------- */
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-8 Conversion DATA
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *   for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAXIMUM_UCS2            0x0000FFFF
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAXIMUM_UTF             0x0010FFFF
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAXIMUM_UCS4            0x7FFFFFFF
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define HALF_SHIFT              10
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define HALF_BASE               0x0010000
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define HALF_MASK               0x3FF
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_HIGH_START    0xD800
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_HIGH_END      0xDBFF
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_LOW_START     0xDC00
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_LOW_END       0xDFFF
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* -SURROGATE_LOW_START + HALF_BASE */
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SURROGATE_LOW_BASE      9216
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t offsetsFromUTF8[7] = {0,
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* END OF UTF-8 Conversion DATA */
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const int8_t bytesFromUTF8[256] = {
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Starting with Unicode 3.0.1:
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N];
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * byte sequences with more than 4 bytes are illegal in UTF-8,
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which is tested with impossible values for them
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const uint32_t
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                  UErrorCode * err)
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv = args->converter;
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const unsigned char *mySource = (unsigned char *) args->source;
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *myTarget = args->target;
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *targetLimit = args->targetLimit;
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    unsigned char *toUBytes = cnv->toUBytes;
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t ch, ch2 = 0;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, inBytes;
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* Restore size of current sequence */
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (cnv->toUnicodeStatus && myTarget < targetLimit)
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        inBytes = cnv->mode;            /* restore # of bytes to consume */
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i = cnv->toULength;             /* restore # of bytes consumed */
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength = 0;
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUnicodeStatus = 0;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto morebytes;
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (mySource < sourceLimit && myTarget < targetLimit)
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = *(mySource++);
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (ch < 0x80)        /* Simple case */
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *(myTarget++) = (UChar) ch;
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* store the first char */
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            toUBytes[0] = (char)ch;
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            i = 1;
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querumorebytes:
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while (i < inBytes)
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (mySource < sourceLimit)
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                {
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    toUBytes[i] = (char) (ch2 = *mySource);
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (!UTF8_IS_TRAIL(ch2))
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    {
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break; /* i < inBytes */
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ch = (ch << 6) + ch2;
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ++mySource;
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    i++;
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                else
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                {
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* stores a partially calculated target*/
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUnicodeStatus = ch;
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->mode = inBytes;
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength = (int8_t) i;
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto donefornow;
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Remove the accumulated high bits */
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ch -= offsetsFromUTF8[inBytes];
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - use only trail bytes after a lead byte (checked above)
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - use the right number of trail bytes for a given lead byte
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - encode a code point <= U+10ffff
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - use the fewest possible number of bytes for their code points
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             *
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * There are no irregular sequences any more.
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (ch <= MAXIMUM_UCS2)
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                {
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* fits in 16 bits */
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *(myTarget++) = (UChar) ch;
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                else
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                {
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* write out the surrogates */
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ch -= HALF_BASE;
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (myTarget < targetLimit)
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    {
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *(myTarget++) = (UChar)ch;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    else
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    {
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* Put in overflow buffer (not handled here) */
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cnv->UCharErrorBuffer[0] = (UChar) ch;
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cnv->UCharErrorBufferLength = 1;
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *err = U_BUFFER_OVERFLOW_ERROR;
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength = (int8_t)i;
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *err = U_ILLEGAL_CHAR_FOUND;
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querudonefornow:
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* End of target buffer */
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *err = U_BUFFER_OVERFLOW_ERROR;
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->target = myTarget;
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->source = (const char *) mySource;
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                UErrorCode * err)
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv = args->converter;
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const unsigned char *mySource = (unsigned char *) args->source;
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar *myTarget = args->target;
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t *myOffsets = args->offsets;
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t offsetNum = 0;
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *targetLimit = args->targetLimit;
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    unsigned char *toUBytes = cnv->toUBytes;
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t ch, ch2 = 0;
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, inBytes;
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* Restore size of current sequence */
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (cnv->toUnicodeStatus && myTarget < targetLimit)
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        inBytes = cnv->mode;            /* restore # of bytes to consume */
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i = cnv->toULength;             /* restore # of bytes consumed */
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength = 0;
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUnicodeStatus = 0;
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto morebytes;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (mySource < sourceLimit && myTarget < targetLimit)
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = *(mySource++);
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (ch < 0x80)        /* Simple case */
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *(myTarget++) = (UChar) ch;
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *(myOffsets++) = offsetNum++;
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            toUBytes[0] = (char)ch;
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            inBytes = bytesFromUTF8[ch];
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            i = 1;
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querumorebytes:
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while (i < inBytes)
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (mySource < sourceLimit)
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                {
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    toUBytes[i] = (char) (ch2 = *mySource);
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (!UTF8_IS_TRAIL(ch2))
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    {
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break; /* i < inBytes */
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ch = (ch << 6) + ch2;
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ++mySource;
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    i++;
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                else
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                {
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toUnicodeStatus = ch;
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->mode = inBytes;
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->toULength = (int8_t)i;
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    goto donefornow;
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Remove the accumulated high bits */
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ch -= offsetsFromUTF8[inBytes];
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - use only trail bytes after a lead byte (checked above)
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - use the right number of trail bytes for a given lead byte
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - encode a code point <= U+10ffff
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - use the fewest possible number of bytes for their code points
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             *
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * There are no irregular sequences any more.
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (ch <= MAXIMUM_UCS2)
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                {
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* fits in 16 bits */
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *(myTarget++) = (UChar) ch;
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *(myOffsets++) = offsetNum;
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                else
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                {
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* write out the surrogates */
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ch -= HALF_BASE;
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *(myOffsets++) = offsetNum;
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (myTarget < targetLimit)
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    {
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *(myTarget++) = (UChar)ch;
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *(myOffsets++) = offsetNum;
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    else
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    {
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cnv->UCharErrorBuffer[0] = (UChar) ch;
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cnv->UCharErrorBufferLength = 1;
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *err = U_BUFFER_OVERFLOW_ERROR;
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                offsetNum += i;
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toULength = (int8_t)i;
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *err = U_ILLEGAL_CHAR_FOUND;
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querudonefornow:
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {   /* End of target buffer */
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *err = U_BUFFER_OVERFLOW_ERROR;
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->target = myTarget;
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->source = (const char *) mySource;
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->offsets = myOffsets;
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                    UErrorCode * err)
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv = args->converter;
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *mySource = args->source;
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *sourceLimit = args->sourceLimit;
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *myTarget = (uint8_t *) args->target;
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *tempPtr;
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 ch;
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t tempBuf[4];
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t indexToWrite;
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (cnv->fromUChar32 && myTarget < targetLimit)
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = cnv->fromUChar32;
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->fromUChar32 = 0;
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto lowsurrogate;
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (mySource < sourceLimit && myTarget < targetLimit)
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = *(mySource++);
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (ch < 0x80)        /* Single byte */
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *(myTarget++) = (uint8_t) ch;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else if (ch < 0x800)  /* Double byte */
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (myTarget < targetLimit)
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->charErrorBufferLength = 1;
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *err = U_BUFFER_OVERFLOW_ERROR;
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else {
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Check for surrogates */
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(UTF_IS_SURROGATE(ch) && isNotCESU8) {
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querulowsurrogate:
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (mySource < sourceLimit) {
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* test both code units */
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) {
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* convert and consume this supplementary code point */
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ch=UTF16_GET_PAIR_VALUE(ch, *mySource);
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ++mySource;
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* exit this condition tree */
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    else {
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* this is an unpaired trail or lead code unit */
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* callback(illegal) */
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cnv->fromUChar32 = ch;
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *err = U_ILLEGAL_CHAR_FOUND;
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                else {
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* no more input */
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->fromUChar32 = ch;
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Do we write the buffer directly for speed,
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            or do we have to be careful about target buffer space? */
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (ch <= MAXIMUM_UCS2) {
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                indexToWrite = 2;
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else {
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                indexToWrite = 3;
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (tempPtr == myTarget) {
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* There was enough space to write the codepoint directly. */
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                myTarget += (indexToWrite + 1);
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else {
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* We might run out of room soon. Write it slowly. */
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (myTarget < targetLimit) {
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *(myTarget++) = *tempPtr;
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    else {
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *err = U_BUFFER_OVERFLOW_ERROR;
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *err = U_BUFFER_OVERFLOW_ERROR;
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->target = (char *) myTarget;
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->source = mySource;
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                  UErrorCode * err)
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv = args->converter;
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *mySource = args->source;
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t *myOffsets = args->offsets;
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *sourceLimit = args->sourceLimit;
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *myTarget = (uint8_t *) args->target;
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *tempPtr;
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 ch;
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t offsetNum, nextSourceIndex;
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t indexToWrite;
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t tempBuf[4];
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (cnv->fromUChar32 && myTarget < targetLimit)
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = cnv->fromUChar32;
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->fromUChar32 = 0;
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        offsetNum = -1;
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        nextSourceIndex = 0;
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto lowsurrogate;
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        offsetNum = 0;
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (mySource < sourceLimit && myTarget < targetLimit)
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = *(mySource++);
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (ch < 0x80)        /* Single byte */
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *(myOffsets++) = offsetNum++;
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *(myTarget++) = (char) ch;
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else if (ch < 0x800)  /* Double byte */
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *(myOffsets++) = offsetNum;
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (myTarget < targetLimit)
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *(myOffsets++) = offsetNum++;
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->charErrorBufferLength = 1;
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *err = U_BUFFER_OVERFLOW_ERROR;
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Check for surrogates */
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            nextSourceIndex = offsetNum + 1;
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(UTF_IS_SURROGATE(ch) && isNotCESU8) {
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querulowsurrogate:
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (mySource < sourceLimit) {
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* test both code units */
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) {
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* convert and consume this supplementary code point */
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ch=UTF16_GET_PAIR_VALUE(ch, *mySource);
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ++mySource;
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ++nextSourceIndex;
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* exit this condition tree */
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    else {
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* this is an unpaired trail or lead code unit */
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* callback(illegal) */
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cnv->fromUChar32 = ch;
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *err = U_ILLEGAL_CHAR_FOUND;
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                else {
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* no more input */
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    cnv->fromUChar32 = ch;
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Do we write the buffer directly for speed,
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            or do we have to be careful about target buffer space? */
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (ch <= MAXIMUM_UCS2) {
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                indexToWrite = 2;
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else {
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                indexToWrite = 3;
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (tempPtr == myTarget) {
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* There was enough space to write the codepoint directly. */
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                myTarget += (indexToWrite + 1);
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                myOffsets[0] = offsetNum;
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                myOffsets[1] = offsetNum;
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                myOffsets[2] = offsetNum;
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (indexToWrite >= 3) {
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    myOffsets[3] = offsetNum;
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                myOffsets += (indexToWrite + 1);
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            else {
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* We might run out of room soon. Write it slowly. */
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (myTarget < targetLimit)
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    {
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *(myOffsets++) = offsetNum;
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *(myTarget++) = *tempPtr;
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    else
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    {
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *err = U_BUFFER_OVERFLOW_ERROR;
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            offsetNum = nextSourceIndex;
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *err = U_BUFFER_OVERFLOW_ERROR;
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->target = (char *) myTarget;
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->source = mySource;
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->offsets = myOffsets;
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                               UErrorCode *err) {
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *cnv;
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *sourceInitial;
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source;
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint16_t extraBytesToWrite;
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t myByte;
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 ch;
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t i, isLegalSequence;
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv = args->converter;
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceInitial = source = (const uint8_t *)args->source;
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (source >= (const uint8_t *)args->sourceLimit)
613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* no input */
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *err = U_INDEX_OUTOFBOUNDS_ERROR;
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0xffff;
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    myByte = (uint8_t)*(source++);
620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (myByte < 0x80)
621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        args->source = (const char *)source;
623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return (UChar32)myByte;
624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte];
627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (extraBytesToWrite == 0) {
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUBytes[0] = myByte;
629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength = 1;
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *err = U_ILLEGAL_CHAR_FOUND;
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        args->source = (const char *)source;
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0xffff;
633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*The byte sequence is longer than the buffer area passed*/
636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit)
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* check if all of the remaining bytes are trail bytes */
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUBytes[0] = myByte;
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i = 1;
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *err = U_TRUNCATED_CHAR_FOUND;
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(source < (const uint8_t *)args->sourceLimit) {
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(U8_IS_TRAIL(myByte = *source)) {
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                cnv->toUBytes[i++] = myByte;
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++source;
646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* error even before we run out of input */
648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *err = U_ILLEGAL_CHAR_FOUND;
649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength = i;
653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        args->source = (const char *)source;
654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0xffff;
655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isLegalSequence = 1;
658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ch = myByte << 6;
659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch(extraBytesToWrite)
660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      /* note: code falls through cases! (sic)*/
662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 6:
663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch += (myByte = *source);
664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch <<= 6;
665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (!UTF8_IS_TRAIL(myByte))
666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            isLegalSequence = 0;
668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++source;
671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 5:
672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch += (myByte = *source);
673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch <<= 6;
674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (!UTF8_IS_TRAIL(myByte))
675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            isLegalSequence = 0;
677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++source;
680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 4:
681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch += (myByte = *source);
682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch <<= 6;
683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (!UTF8_IS_TRAIL(myByte))
684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            isLegalSequence = 0;
686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++source;
689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 3:
690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch += (myByte = *source);
691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch <<= 6;
692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (!UTF8_IS_TRAIL(myByte))
693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            isLegalSequence = 0;
695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++source;
698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 2:
699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch += (myByte = *source);
700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (!UTF8_IS_TRAIL(myByte))
701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            isLegalSequence = 0;
703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++source;
706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ch -= offsetsFromUTF8[extraBytesToWrite];
708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    args->source = (const char *)source;
709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - use only trail bytes after a lead byte (checked above)
713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - use the right number of trail bytes for a given lead byte
714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - encode a code point <= U+10ffff
715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - use the fewest possible number of bytes for their code points
716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * There are no irregular sequences any more.
720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (isLegalSequence &&
722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        (uint32_t)ch <= MAXIMUM_UTF &&
723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        (uint32_t)ch >= utf8_minChar32[extraBytesToWrite] &&
724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        !U_IS_SURROGATE(ch)
725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ) {
726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return ch; /* return the code point */
727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i = 0; sourceInitial < source; ++i) {
730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toUBytes[i] = *sourceInitial++;
731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv->toULength = i;
733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *err = U_ILLEGAL_CHAR_FOUND;
734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0xffff;
735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */
738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar32
741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar32
745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruutf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  UConverterToUnicodeArgs *pToUArgs,
751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  UErrorCode *pErrorCode) {
752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter *utf8, *cnv;
753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *source, *sourceLimit;
754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *target;
755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t targetCapacity;
756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t count;
757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int8_t oldToULength, toULength, toULimit;
759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t b, t1, t2;
762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set up the local pointers */
764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utf8=pToUArgs->converter;
765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cnv=pFromUArgs->converter;
766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    source=(uint8_t *)pToUArgs->source;
767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=(uint8_t *)pFromUArgs->target;
769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* get the converter state from the UTF-8 UConverter */
772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=(UChar32)utf8->toUnicodeStatus;
773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c!=0) {
774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        toULength=oldToULength=utf8->toULength;
775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        toULimit=(int8_t)utf8->mode;
776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        toULength=oldToULength=toULimit=0;
778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    count=(int32_t)(sourceLimit-source)+oldToULength;
781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(count<toULimit) {
782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Not enough input to complete the partial character.
784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Jump to moreBytes below - it will not output to target.
785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(targetCapacity<toULimit) {
787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Not enough target capacity to output the partial character.
789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Let the standard converter handle this.
790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_USING_DEFAULT_WARNING;
792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Use a single counter for source and target, counting the minimum of
796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * the source length and the target capacity.
797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * As a result, the source length is checked only once per multi-byte
798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * character instead of twice.
799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Make sure that the last byte sequence is complete, or else
801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * stop just before it.
802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * (The longest legal byte sequence has 3 trail bytes.)
803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Count oldToULength (number of source bytes from a previous buffer)
804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * into the source length but reduce the source index by toULimit
805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * while going back over trail bytes in order to not go back into
806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * the bytes that will be read for finishing a partial
807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * sequence from the previous buffer.
808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Let the standard converter handle edge cases.
809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t i;
811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(count>targetCapacity) {
813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            count=targetCapacity;
814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=0;
817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while(i<3 && i<(count-toULimit)) {
818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=source[count-oldToULength-i-1];
819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(U8_IS_TRAIL(b)) {
820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++i;
821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(i<utf8_countTrailBytes[b]) {
823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* stop converting before the lead byte if there are not enough trail bytes for it */
824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    count-=i+1;
825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c!=0) {
832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        utf8->toUnicodeStatus=0;
833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        utf8->toULength=0;
834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        goto moreBytes;
835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* See note in ucnv_SBCSFromUTF8() about this goto. */
836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* conversion loop */
839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(count>0) {
840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        b=*source++;
841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((int8_t)b>=0) {
842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* convert ASCII */
843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *target++=b;
844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            --count;
845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(b>0xe0) {
848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if( /* handle U+1000..U+D7FF inline */
849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    (t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) ||
850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                               (b==0xed && (t1 <= 0x9f))) &&
851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    (t2=source[1]) >= 0x80 && t2 <= 0xbf
852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    source+=2;
854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=b;
855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=t1;
856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=t2;
857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    count-=3;
858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(b<0xe0) {
861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if( /* handle U+0080..U+07FF inline */
862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    b>=0xc2 &&
863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    (t1=*source) >= 0x80 && t1 <= 0xbf
864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ++source;
866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=b;
867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=t1;
868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    count-=2;
869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(b==0xe0) {
872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if( /* handle U+0800..U+0FFF inline */
873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    (t1=source[0]) >= 0xa0 && t1 <= 0xbf &&
874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    (t2=source[1]) >= 0x80 && t2 <= 0xbf
875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    source+=2;
877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=b;
878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=t1;
879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=t2;
880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    count-=3;
881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    continue;
882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* handle "complicated" and error cases, and continuing partial characters */
886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            oldToULength=0;
887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            toULength=1;
888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            toULimit=utf8_countTrailBytes[b]+1;
889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c=b;
890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerumoreBytes:
891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while(toULength<toULimit) {
892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(source<sourceLimit) {
893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    b=*source;
894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(U8_IS_TRAIL(b)) {
895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ++source;
896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ++toULength;
897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        c=(c<<6)+b;
898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break; /* sequence too short, stop with toULength<toULimit */
900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    source-=(toULength-oldToULength);
904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    while(oldToULength<toULength) {
905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        utf8->toUBytes[oldToULength++]=*source++;
906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    utf8->toUnicodeStatus=c;
908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    utf8->toULength=toULength;
909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    utf8->mode=toULimit;
910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    pToUArgs->source=(char *)source;
911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    pFromUArgs->target=(char *)target;
912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return;
913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if( toULength==toULimit &&      /* consumed all trail bytes */
917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (toULength==3 || toULength==2) &&             /* BMP */
918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (c<=0xd7ff || 0xe000<=c)    /* not a surrogate */
920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ) {
921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* legal byte sequence for BMP code point */
922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(
923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                toULength==toULimit && toULength==4 &&
924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ) {
926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* legal byte sequence for supplementary code point */
927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* error handling: illegal UTF-8 byte sequence */
929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                source-=(toULength-oldToULength);
930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                while(oldToULength<toULength) {
931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    utf8->toUBytes[oldToULength++]=*source++;
932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                utf8->toULength=toULength;
934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pToUArgs->source=(char *)source;
935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pFromUArgs->target=(char *)target;
936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* copy the legal byte sequence to the target */
941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                int8_t i;
943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                for(i=0; i<oldToULength; ++i) {
945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=utf8->toUBytes[i];
946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                source-=(toULength-oldToULength);
948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                for(; i<toULength; ++i) {
949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    *target++=*source++;
950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                count-=toULength;
952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(*pErrorCode) && source<sourceLimit) {
957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(target==(const uint8_t *)pFromUArgs->targetLimit) {
958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=*source;
961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            toULimit=utf8_countTrailBytes[b]+1;
962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(toULimit>(sourceLimit-source)) {
963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* collect a truncated byte sequence */
964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                toULength=0;
965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=b;
966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                for(;;) {
967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    utf8->toUBytes[toULength++]=b;
968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(++source==sourceLimit) {
969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* partial byte sequence at end of source */
970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        utf8->toUnicodeStatus=c;
971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        utf8->toULength=toULength;
972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        utf8->mode=toULimit;
973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else if(!U8_IS_TRAIL(b=*source)) {
975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* lead byte in trail byte position */
976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        utf8->toULength=toULength;
977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        break;
979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c=(c<<6)+b;
981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* partial-sequence target overflow: fall back to the pivoting implementation */
984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *pErrorCode=U_USING_DEFAULT_WARNING;
985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write back the updated pointers */
990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pToUArgs->source=(char *)source;
991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pFromUArgs->target=(char *)target;
992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UTF-8 converter data ----------------------------------------------------- */
995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _UTF8Impl={
997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_UTF8,
998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_toUnicode_UTF8,
1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_fromUnicode_UTF8,
1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_getNextUChar_UTF8,
1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_getNonSurrogateUnicodeSet,
1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_UTF8FromUTF8,
1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_UTF8FromUTF8
1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* The 1208 CCSID refers to any version of Unicode of UTF-8 */
1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _UTF8StaticData={
1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterStaticData),
1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "UTF-8",
1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    1208, UCNV_IBM, UCNV_UTF8,
1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _UTF8Data={
1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterSharedData), ~((uint32_t) 0),
1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL, NULL, &_UTF8StaticData, FALSE, &_UTF8Impl,
1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0
1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* CESU-8 converter data ---------------------------------------------------- */
1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterImpl _CESU8Impl={
1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_CESU8,
1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_toUnicode_UTF8,
1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_fromUnicode_UTF8,
1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL,
1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_getCompleteUnicodeSet
1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UConverterStaticData _CESU8StaticData={
1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterStaticData),
1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "CESU-8",
1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    9400, /* CCSID for CESU-8 */
1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UConverterSharedData _CESU8Data={
1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UConverterSharedData), ~((uint32_t) 0),
1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NULL, NULL, &_CESU8StaticData, FALSE, &_CESU8Impl,
1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0
1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
1085