1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1998-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*
7* File utf8tst.c
8*
9* Modification History:
10*
11*   Date          Name        Description
12*   07/24/2000    Madhu       Creation
13*******************************************************************************
14*/
15
16#include "unicode/utypes.h"
17#include "unicode/utf8.h"
18#include "cmemory.h"
19#include "cintltst.h"
20
21/* lenient UTF-8 ------------------------------------------------------------ */
22
23/*
24 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate
25 * code points with their "natural" encoding.
26 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of
27 * single surrogates.
28 *
29 * This is not conformant with UTF-8.
30 *
31 * Supplementary code points may be encoded as pairs of 3-byte sequences, but
32 * the macros below do not attempt to assemble such pairs.
33 */
34
35#define L8_NEXT(s, i, length, c) { \
36    (c)=(uint8_t)(s)[(i)++]; \
37    if((c)>=0x80) { \
38        if(U8_IS_LEAD(c)) { \
39            (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -2); \
40        } else { \
41            (c)=U_SENTINEL; \
42        } \
43    } \
44}
45
46#define L8_PREV(s, start, i, c) { \
47    (c)=(uint8_t)(s)[--(i)]; \
48    if((c)>=0x80) { \
49        if((c)<=0xbf) { \
50            (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -2); \
51        } else { \
52            (c)=U_SENTINEL; \
53        } \
54    } \
55}
56
57/* -------------------------------------------------------------------------- */
58
59static void printUChars(const uint8_t *uchars, int16_t len);
60
61static void TestCodeUnitValues(void);
62static void TestCharLength(void);
63static void TestGetChar(void);
64static void TestNextPrevChar(void);
65static void TestNulTerminated(void);
66static void TestNextPrevNonCharacters(void);
67static void TestNextPrevCharUnsafe(void);
68static void TestFwdBack(void);
69static void TestFwdBackUnsafe(void);
70static void TestSetChar(void);
71static void TestSetCharUnsafe(void);
72static void TestAppendChar(void);
73static void TestAppend(void);
74static void TestSurrogates(void);
75
76void addUTF8Test(TestNode** root);
77
78void
79addUTF8Test(TestNode** root)
80{
81    addTest(root, &TestCodeUnitValues,          "utf8tst/TestCodeUnitValues");
82    addTest(root, &TestCharLength,              "utf8tst/TestCharLength");
83    addTest(root, &TestGetChar,                 "utf8tst/TestGetChar");
84    addTest(root, &TestNextPrevChar,            "utf8tst/TestNextPrevChar");
85    addTest(root, &TestNulTerminated,           "utf8tst/TestNulTerminated");
86    addTest(root, &TestNextPrevNonCharacters,   "utf8tst/TestNextPrevNonCharacters");
87    addTest(root, &TestNextPrevCharUnsafe,      "utf8tst/TestNextPrevCharUnsafe");
88    addTest(root, &TestFwdBack,                 "utf8tst/TestFwdBack");
89    addTest(root, &TestFwdBackUnsafe,           "utf8tst/TestFwdBackUnsafe");
90    addTest(root, &TestSetChar,                 "utf8tst/TestSetChar");
91    addTest(root, &TestSetCharUnsafe,           "utf8tst/TestSetCharUnsafe");
92    addTest(root, &TestAppendChar,              "utf8tst/TestAppendChar");
93    addTest(root, &TestAppend,                  "utf8tst/TestAppend");
94    addTest(root, &TestSurrogates,              "utf8tst/TestSurrogates");
95}
96
97static void TestCodeUnitValues()
98{
99    static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0xfd, 0x80, 0x81, 0xbc, 0xbe,};
100
101    int16_t i;
102    for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
103        uint8_t c=codeunit[i];
104        log_verbose("Testing code unit value of %x\n", c);
105        if(i<4){
106            if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){
107                log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n",
108                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
109            }
110        } else if(i< 8){
111            if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){
112                log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",
113                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
114            }
115        } else if(i< 12){
116            if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || !U8_IS_TRAIL(c) || U8_IS_SINGLE(c) || U8_IS_LEAD(c)){
117                log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n",
118                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
119            }
120        }
121    }
122}
123
124static void TestCharLength()
125{
126    static const uint32_t codepoint[]={
127        1, 0x0061,
128        1, 0x007f,
129        2, 0x016f,
130        2, 0x07ff,
131        3, 0x0865,
132        3, 0x20ac,
133        4, 0x20402,
134        4, 0x23456,
135        4, 0x24506,
136        4, 0x20402,
137        4, 0x10402,
138        3, 0xd7ff,
139        3, 0xe000,
140
141    };
142
143    int16_t i;
144    UBool multiple;
145    for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
146        UChar32 c=codepoint[i+1];
147        if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uint16_t)codepoint[i]){
148              log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c));
149        }else{
150              log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_LENGTH(c));
151        }
152        multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
153        if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){
154              log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c);
155        }
156    }
157}
158
159static void TestGetChar()
160{
161    static const uint8_t input[]={
162    /*  code unit,*/
163        0x61,
164        0x7f,
165        0xe4,
166        0xba,
167        0x8c,
168        0xF0,
169        0x90,
170        0x90,
171        0x81,
172        0xc0,
173        0x65,
174        0x31,
175        0x9a,
176        0xc9
177    };
178    static const UChar32 result[]={
179    /*  codepoint-unsafe, codepoint-safe(not strict)  codepoint-safe(strict) */
180        0x61,             0x61,                       0x61,
181        0x7f,             0x7f,                       0x7f,
182        0x4e8c,           0x4e8c,                     0x4e8c,
183        0x4e8c,           0x4e8c,                     0x4e8c ,
184        0x4e8c,           0x4e8c,                     0x4e8c,
185        0x10401,          0x10401,                    0x10401 ,
186        0x10401,          0x10401,                    0x10401 ,
187        0x10401,          0x10401,                    0x10401 ,
188        0x10401,          0x10401,                    0x10401,
189        0x25,             UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1,
190        0x65,             0x65,                       0x65,
191        0x31,             0x31,                       0x31,
192        0x31,             UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1,
193        0x240,            UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1
194    };
195    uint16_t i=0;
196    UChar32 c, expected;
197    uint32_t offset=0;
198
199    for(offset=0; offset<sizeof(input); offset++) {
200        if (offset < sizeof(input) - 1) {
201            UTF8_GET_CHAR_UNSAFE(input, offset, c);
202            if(c != result[i]){
203                log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
204
205            }
206
207            U8_GET_UNSAFE(input, offset, c);
208            if(c != result[i]){
209                log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
210
211            }
212        }
213
214        UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, FALSE);
215        expected=result[i+1];
216        if(c != expected){
217            log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
218        }
219
220        U8_GET(input, 0, offset, sizeof(input), c);
221        if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; }
222        if(c != expected){
223            log_err("ERROR: U8_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
224        }
225
226        U8_GET_OR_FFFD(input, 0, offset, sizeof(input), c);
227        if(expected<0) { expected=0xfffd; }
228        if(c != expected){
229            log_err("ERROR: U8_GET_OR_FFFD failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
230        }
231
232        UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, TRUE);
233        if(c != result[i+2]){
234            log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
235        }
236
237        i=(uint16_t)(i+3);
238    }
239}
240
241static void TestNextPrevChar() {
242    static const uint8_t input[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00};
243    static const UChar32 result[]={
244    /*  next_unsafe    next_safe_ns        next_safe_s          prev_unsafe   prev_safe_ns        prev_safe_s */
245        0x0061,        0x0061,             0x0061,              0x0000,       0x0000,             0x0000,
246        0x10401,       0x10401,            0x10401,             0xf0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
247        0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x2841410,    UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
248        0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xa1050,      UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
249        0x81,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x2841,       UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
250        0x00,          UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,  0x61,         0x61,               0x61,
251        0x80,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xc2,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
252        0xfd,          UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,  0x77e,        UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
253        0xbe,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xfd,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
254        0xa1,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x00,         UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
255        0x61,          0x61,               0x61,                0xc0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
256        0x81,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x10401,      0x10401,            0x10401,
257        0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x410,        UTF_ERROR_VALUE,    UTF_ERROR_VALUE,
258        0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x410,        UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
259        0x0840,        UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xf0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
260        0x0000,        0x0000,             0x0000,              0x0061,       0x0061,             0x0061
261    };
262    static const int32_t movedOffset[]={
263    /*  next_unsafe   next_safe_ns next_safe_s       prev_unsafe   prev_safe_ns      prev_safe_s */
264        1,            1,           1,                15,           15,               15,
265        5,            5,           5,                14,           14 ,              14,
266        3,            3,           3,                9,            13,               13,
267        4,            4,           4,                9,            12,               12,
268        5,            5,           5,                9,            11,               11,
269        7,            7,           7,                10,           10,               10,
270        7,            7,           7,                9,            9,                9,
271        8,            9,           9,                7,            7,                7,
272        9,            9,           9,                7,            7,                7,
273        11,           10,          10,               5,            5,                5,
274        11,           11,          11,               5,            5,                5,
275        12,           12,          12,               1,            1,                1,
276        13,           13,          13,               1,            1,                1,
277        14,           14,          14,               1,            1,                1,
278        14,           15,          15,               1,            1,                1,
279        14,           16,          16,               0,            0,                0,
280    };
281    /* TODO: remove unused columns for next_unsafe & prev_unsafe, and adjust the test code */
282
283    UChar32 c, expected;
284    uint32_t i=0;
285    uint32_t offset=0;
286    int32_t setOffset=0;
287    for(offset=0; offset<sizeof(input); offset++){
288         setOffset=offset;
289         UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE);
290         if(setOffset != movedOffset[i+1]){
291             log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
292                 offset, movedOffset[i+1], setOffset);
293         }
294        expected=result[i+1];
295        if(c != expected){
296            log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
297        }
298
299         setOffset=offset;
300         U8_NEXT(input, setOffset, sizeof(input), c);
301         if(setOffset != movedOffset[i+1]){
302             log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
303                 offset, movedOffset[i+1], setOffset);
304         }
305        if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; }
306        if(c != expected){
307            log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
308        }
309
310        setOffset=offset;
311        U8_NEXT_OR_FFFD(input, setOffset, sizeof(input), c);
312        if(setOffset != movedOffset[i+1]){
313            log_err("ERROR: U8_NEXT_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
314                offset, movedOffset[i+1], setOffset);
315        }
316        if(expected<0) { expected=0xfffd; }
317        if(c != expected){
318            log_err("ERROR: U8_NEXT_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
319        }
320
321         setOffset=offset;
322         UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE);
323         if(setOffset != movedOffset[i+1]){
324             log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
325                 offset, movedOffset[i+2], setOffset);
326         }
327         if(c != result[i+2]){
328             log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
329         }
330
331         i=i+6;
332    }
333
334    i=0;
335    for(offset=sizeof(input); offset > 0; --offset){
336         setOffset=offset;
337         UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
338         if(setOffset != movedOffset[i+4]){
339             log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
340                 offset, movedOffset[i+4], setOffset);
341         }
342        expected=result[i+4];
343        if(c != expected){
344            log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
345        }
346
347         setOffset=offset;
348         U8_PREV(input, 0, setOffset, c);
349         if(setOffset != movedOffset[i+4]){
350             log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
351                 offset, movedOffset[i+4], setOffset);
352         }
353        if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; }
354        if(c != expected){
355            log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
356        }
357
358        setOffset=offset;
359        U8_PREV_OR_FFFD(input, 0, setOffset, c);
360        if(setOffset != movedOffset[i+4]){
361            log_err("ERROR: U8_PREV_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
362                offset, movedOffset[i+4], setOffset);
363        }
364        if(expected<0) { expected=0xfffd; }
365        if(c != expected){
366            log_err("ERROR: U8_PREV_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
367        }
368
369         setOffset=offset;
370         UTF8_PREV_CHAR_SAFE(input, 0,  setOffset, c, TRUE);
371         if(setOffset != movedOffset[i+5]){
372             log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
373                 offset, movedOffset[i+5], setOffset);
374         }
375         if(c != result[i+5]){
376             log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
377         }
378
379         i=i+6;
380    }
381}
382
383/* keep this in sync with utf16tst.c's TestNulTerminated() */
384static void TestNulTerminated() {
385    static const uint8_t input[]={
386        /*  0 */  0x61,
387        /*  1 */  0xf0, 0x90, 0x90, 0x81,
388        /*  5 */  0xc0, 0x80,
389        /*  7 */  0xdf, 0x80,
390        /*  9 */  0xc2,
391        /* 10 */  0x62,
392        /* 11 */  0xfd, 0xbe,
393        /* 13 */  0xe0, 0xa0, 0x80,
394        /* 16 */  0xe2, 0x82, 0xac,
395        /* 19 */  0xf0, 0x90, 0x90,
396        /* 22 */  0x00
397        /* 23 */
398    };
399    static const UChar32 result[]={
400        0x61,
401        0x10401,
402        U_SENTINEL,
403        0x7c0,
404        U_SENTINEL,
405        0x62,
406        U_SENTINEL,
407        0x800,
408        0x20ac,
409        U_SENTINEL,
410        0
411    };
412
413    UChar32 c, c2, expected;
414    int32_t i0, i=0, j, k, expectedIndex;
415    int32_t cpIndex=0;
416    do {
417        i0=i;
418        U8_NEXT(input, i, -1, c);
419        expected=result[cpIndex];
420        if(c!=expected) {
421            log_err("U8_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, expected);
422        }
423        j=i0;
424        U8_NEXT_OR_FFFD(input, j, -1, c);
425        if(expected<0) { expected=0xfffd; }
426        if(c!=expected) {
427            log_err("U8_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x\n", i0, c, expected);
428        }
429        if(j!=i) {
430            log_err("U8_NEXT_OR_FFFD() moved to index %d but U8_NEXT() moved to %d\n", j, i);
431        }
432        j=i0;
433        U8_FWD_1(input, j, -1);
434        if(j!=i) {
435            log_err("U8_FWD_1() moved to index %d but U8_NEXT() moved to %d\n", j, i);
436        }
437        ++cpIndex;
438        /*
439         * Move by this many code points from the start.
440         * U8_FWD_N() stops at the end of the string, that is, at the NUL if necessary.
441         */
442        expectedIndex= (c==0) ? i-1 : i;
443        k=0;
444        U8_FWD_N(input, k, -1, cpIndex);
445        if(k!=expectedIndex) {
446            log_err("U8_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex);
447        }
448    } while(c!=0);
449
450    i=0;
451    do {
452        j=i0=i;
453        U8_NEXT(input, i, -1, c);
454        do {
455            U8_GET(input, 0, j, -1, c2);
456            if(c2!=c) {
457                log_err("U8_NEXT(from %d)=U+%04x != U+%04x=U8_GET(at %d)\n", i0, c, c2, j);
458            }
459            U8_GET_OR_FFFD(input, 0, j, -1, c2);
460            expected= (c>=0) ? c : 0xfffd;
461            if(c2!=expected) {
462                log_err("U8_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x=U8_GET_OR_FFFD(at %d)\n", i0, expected, c2, j);
463            }
464            /* U8_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */
465            k=j+1;
466            U8_SET_CP_LIMIT(input, 0, k, -1);
467            if(k!=i) {
468                log_err("U8_NEXT() moved to %d but U8_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k);
469            }
470        } while(++j<i);
471    } while(c!=0);
472}
473
474static void TestNextPrevNonCharacters() {
475    /* test non-characters */
476    static const uint8_t nonChars[]={
477        0xef, 0xb7, 0x90,       /* U+fdd0 */
478        0xef, 0xbf, 0xbf,       /* U+feff */
479        0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */
480        0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */
481        0xf4, 0x8f, 0xbf, 0xbe  /* U+10fffe */
482    };
483
484    UChar32 ch;
485    int32_t idx;
486
487    for(idx=0; idx<(int32_t)sizeof(nonChars);) {
488        U8_NEXT(nonChars, idx, sizeof(nonChars), ch);
489        if(!U_IS_UNICODE_NONCHAR(ch)) {
490            log_err("U8_NEXT(before %d) failed to read a non-character\n", idx);
491        }
492    }
493    for(idx=(int32_t)sizeof(nonChars); idx>0;) {
494        U8_PREV(nonChars, 0, idx, ch);
495        if(!U_IS_UNICODE_NONCHAR(ch)) {
496            log_err("U8_PREV(at %d) failed to read a non-character\n", idx);
497        }
498    }
499}
500
501static void TestNextPrevCharUnsafe() {
502    /*
503     * Use a (mostly) well-formed UTF-8 string and test at code point boundaries.
504     * The behavior of _UNSAFE macros for ill-formed strings is undefined.
505     */
506    static const uint8_t input[]={
507        0x61,
508        0xf0, 0x90, 0x90, 0x81,
509        0xc0, 0x80,  /* non-shortest form */
510        0xe2, 0x82, 0xac,
511        0xc2, 0xa1,
512        0xf4, 0x8f, 0xbf, 0xbf,
513        0x00
514    };
515    static const UChar32 codePoints[]={
516        0x61,
517        0x10401,
518        0,
519        0x20ac,
520        0xa1,
521        0x10ffff,
522        0
523    };
524
525    UChar32 c;
526    int32_t i;
527    uint32_t offset;
528    for(i=0, offset=0; offset<sizeof(input); ++i) {
529        UTF8_NEXT_CHAR_UNSAFE(input, offset, c);
530        if(c != codePoints[i]){
531            log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
532                    offset, codePoints[i], c);
533        }
534    }
535    for(i=0, offset=0; offset<sizeof(input); ++i) {
536        U8_NEXT_UNSAFE(input, offset, c);
537        if(c != codePoints[i]){
538            log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
539                    offset, codePoints[i], c);
540        }
541    }
542
543    for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
544         UTF8_PREV_CHAR_UNSAFE(input, offset, c);
545         if(c != codePoints[i]){
546             log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
547                     offset, codePoints[i], c);
548         }
549    }
550    for(i=UPRV_LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
551         U8_PREV_UNSAFE(input, offset, c);
552         if(c != codePoints[i]){
553             log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
554                     offset, codePoints[i], c);
555         }
556    }
557}
558
559static void TestFwdBack() {
560    static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};
561    static const uint16_t fwd_safe[]   ={1, 5, 6, 7, 9, 10, 11,  12, 13, 14, 15, 16, 17, 18};
562    static const uint16_t back_safe[]  ={17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 1, 0};
563
564    static const uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1, 5};
565    static const uint16_t fwd_N_safe[]   ={0, 1, 6, 10, 11, 13, 14, 18}; /*safe macro keeps it at the end of the string */
566    static const uint16_t back_N_safe[]  ={18, 17, 15, 12, 11, 9, 7, 0};
567
568    uint32_t offsafe=0;
569
570    uint32_t i=0;
571    while(offsafe < sizeof(input)){
572        UTF8_FWD_1_SAFE(input, offsafe, sizeof(input));
573        if(offsafe != fwd_safe[i]){
574            log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
575        }
576        i++;
577    }
578
579    i=0;
580    while(offsafe < sizeof(input)){
581        U8_FWD_1(input, offsafe, sizeof(input));
582        if(offsafe != fwd_safe[i]){
583            log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
584        }
585        i++;
586    }
587
588    i=0;
589    offsafe=sizeof(input);
590    while(offsafe > 0){
591        UTF8_BACK_1_SAFE(input, 0,  offsafe);
592        if(offsafe != back_safe[i]){
593            log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_safe[i], offsafe);
594        }
595        i++;
596    }
597
598    i=0;
599    offsafe=sizeof(input);
600    while(offsafe > 0){
601        U8_BACK_1(input, 0,  offsafe);
602        if(offsafe != back_safe[i]){
603            log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i], offsafe);
604        }
605        i++;
606    }
607
608    offsafe=0;
609    for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
610        UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);
611        if(offsafe != fwd_N_safe[i]){
612            log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
613        }
614
615    }
616
617    offsafe=0;
618    for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
619        U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);
620        if(offsafe != fwd_N_safe[i]){
621            log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
622        }
623
624    }
625
626    offsafe=sizeof(input);
627    for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
628        UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
629        if(offsafe != back_N_safe[i]){
630            log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
631        }
632    }
633
634    offsafe=sizeof(input);
635    for(i=0; i<UPRV_LENGTHOF(Nvalue); i++){
636        U8_BACK_N(input, 0, offsafe, Nvalue[i]);
637        if(offsafe != back_N_safe[i]){
638            log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
639        }
640    }
641}
642
643static void TestFwdBackUnsafe() {
644    /*
645     * Use a (mostly) well-formed UTF-8 string and test at code point boundaries.
646     * The behavior of _UNSAFE macros for ill-formed strings is undefined.
647     */
648    static const uint8_t input[]={
649        0x61,
650        0xf0, 0x90, 0x90, 0x81,
651        0xc0, 0x80,  /* non-shortest form */
652        0xe2, 0x82, 0xac,
653        0xc2, 0xa1,
654        0xf4, 0x8f, 0xbf, 0xbf,
655        0x00
656    };
657    static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 };
658
659    int32_t offset;
660    int32_t i;
661    for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) {
662        UTF8_FWD_1_UNSAFE(input, offset);
663        if(offset != boundaries[i]){
664            log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
665        }
666    }
667    for(i=1, offset=0; offset<UPRV_LENGTHOF(input); ++i) {
668        U8_FWD_1_UNSAFE(input, offset);
669        if(offset != boundaries[i]){
670            log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
671        }
672    }
673
674    for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; --i) {
675        UTF8_BACK_1_UNSAFE(input, offset);
676        if(offset != boundaries[i]){
677            log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
678        }
679    }
680    for(i=UPRV_LENGTHOF(boundaries)-2, offset=UPRV_LENGTHOF(input); offset>0; --i) {
681        U8_BACK_1_UNSAFE(input, offset);
682        if(offset != boundaries[i]){
683            log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
684        }
685    }
686
687    for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {
688        offset=0;
689        UTF8_FWD_N_UNSAFE(input, offset, i);
690        if(offset != boundaries[i]) {
691            log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
692        }
693    }
694    for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {
695        offset=0;
696        U8_FWD_N_UNSAFE(input, offset, i);
697        if(offset != boundaries[i]) {
698            log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
699        }
700    }
701
702    for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {
703        int32_t j=UPRV_LENGTHOF(boundaries)-1-i;
704        offset=UPRV_LENGTHOF(input);
705        UTF8_BACK_N_UNSAFE(input, offset, i);
706        if(offset != boundaries[j]) {
707            log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset);
708        }
709    }
710    for(i=0; i<UPRV_LENGTHOF(boundaries); ++i) {
711        int32_t j=UPRV_LENGTHOF(boundaries)-1-i;
712        offset=UPRV_LENGTHOF(input);
713        U8_BACK_N_UNSAFE(input, offset, i);
714        if(offset != boundaries[j]) {
715            log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset);
716        }
717    }
718}
719
720static void TestSetChar() {
721    static const uint8_t input[]
722        = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x00 };
723    static const int16_t start_safe[]
724        = {0,    1,    1,    1,    4,    5,    6,    7,    8,    9,    10,   11,   12,   13,  14 };
725    static const int16_t limit_safe[]
726        = {0,    1,    4,    4,    4,    5,    6,    7,    8,    9,    10,   11,   12,   13,  14 };
727
728    uint32_t i=0;
729    int32_t offset=0, setOffset=0;
730    for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){
731        if (offset<UPRV_LENGTHOF(input)){
732            setOffset=offset;
733            UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);
734            if(setOffset != start_safe[i]){
735                log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
736            }
737
738            setOffset=offset;
739            U8_SET_CP_START(input, 0, setOffset);
740            if(setOffset != start_safe[i]){
741                log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
742            }
743        }
744
745        setOffset=offset;
746        UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input));
747        if(setOffset != limit_safe[i]){
748            log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
749        }
750
751        setOffset=offset;
752        U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input));
753        if(setOffset != limit_safe[i]){
754            log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
755        }
756
757        i++;
758    }
759}
760
761static void TestSetCharUnsafe() {
762    static const uint8_t input[]
763        = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x80, 0x80, 0x00 };
764    static const int16_t start_unsafe[]
765        = {0,    1,    1,    1,    4,    5,    6,    7,    8,    9,    9,    9,    12,   12,   12,   15 };
766    static const int16_t limit_unsafe[]
767        = {0,    1,    4,    4,    4,    5,    6,    7,    9,    9,    10,   10,   10,   15,   15,   15,   16 };
768
769    uint32_t i=0;
770    int32_t offset=0, setOffset=0;
771    for(offset=0; offset<=UPRV_LENGTHOF(input); offset++){
772        if (offset<UPRV_LENGTHOF(input)){
773            setOffset=offset;
774            UTF8_SET_CHAR_START_UNSAFE(input, setOffset);
775            if(setOffset != start_unsafe[i]){
776                log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
777            }
778
779            setOffset=offset;
780            U8_SET_CP_START_UNSAFE(input, setOffset);
781            if(setOffset != start_unsafe[i]){
782                log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
783            }
784        }
785
786        if (offset != 0) { /* Can't have it go off the end of the array */
787            setOffset=offset;
788            UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
789            if(setOffset != limit_unsafe[i]){
790                log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
791            }
792
793            setOffset=offset;
794            U8_SET_CP_LIMIT_UNSAFE(input, setOffset);
795            if(setOffset != limit_unsafe[i]){
796                log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
797            }
798        }
799
800        i++;
801    }
802}
803
804static void TestAppendChar(){
805    static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};
806    static const uint32_t test[]={
807    /*  append-position(unsafe),  CHAR to be appended */
808        0,                        0x10401,
809        2,                        0x0028,
810        2,                        0x007f,
811        3,                        0xd801,
812        1,                        0x20402,
813        8,                        0x10401,
814        5,                        0xc0,
815        5,                        0xc1,
816        5,                        0xfd,
817        6,                        0x80,
818        6,                        0x81,
819        6,                        0xbf,
820        7,                        0xfe,
821
822    /*  append-position(safe),    CHAR to be appended */
823        0,                        0x10401,
824        2,                        0x0028,
825        3,                        0x7f,
826        3,                        0xd801,   /* illegal for UTF-8 starting with Unicode 3.2 */
827        1,                        0x20402,
828        9,                        0x10401,
829        5,                        0xc0,
830        5,                        0xc1,
831        5,                        0xfd,
832        6,                        0x80,
833        6,                        0x81,
834        6,                        0xbf,
835        7,                        0xfe,
836
837    };
838    static const uint16_t movedOffset[]={
839    /* offset-moved-to(unsafe) */
840          4,              /*for append-pos: 0 , CHAR 0x10401*/
841          3,
842          3,
843          6,
844          5,
845          12,
846          7,
847          7,
848          7,
849          8,
850          8,
851          8,
852          9,
853
854    /* offset-moved-to(safe) */
855          4,              /*for append-pos: 0, CHAR  0x10401*/
856          3,
857          4,
858          6,
859          5,
860          11,
861          7,
862          7,
863          7,
864          8,
865          8,
866          8,
867          9,
868
869    };
870
871    static const uint8_t result[][11]={
872        /*unsafe*/
873        {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
874        {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
875        {0x61, 0x62, 0x7f, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
876        {0x61, 0x62, 0x63, 0xed, 0xa0, 0x81, 0x67, 0x68, 0x69, 0x6a, 0x00},
877        {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
878        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0xF0, 0x90, 0x90},
879
880        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
881        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
882        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
883
884        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
885        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
886        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
887
888        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
889        /*safe*/
890        {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
891        {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
892        {0x61, 0x62, 0x63, 0x7f, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
893        {0x61, 0x62, 0x63, 0xef, 0xbf, 0xbf, 0x67, 0x68, 0x69, 0x6a, 0x00},
894        {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
895        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc2, 0x9f}, /*gets UTF8_ERROR_VALUE_2 which takes 2 bytes 0xc0, 0x9f*/
896
897        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
898        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
899        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
900
901        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
902        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
903        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
904
905        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
906
907    };
908    uint16_t i, count=0;
909    uint8_t str[12];
910    uint32_t offset;
911/*    UChar32 c=0;*/
912    uint16_t size=UPRV_LENGTHOF(s);
913    for(i=0; i<UPRV_LENGTHOF(test); i=(uint16_t)(i+2)){
914        uprv_memcpy(str, s, size);
915        offset=test[i];
916        if(count<13){
917            UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);
918            if(offset != movedOffset[count]){
919                log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
920                    count, movedOffset[count], offset);
921
922            }
923            if(uprv_memcmp(str, result[count], size) !=0){
924                log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed for count=%d. \nExpected:", count);
925                printUChars(result[count], size);
926                log_err("\nGot:      ");
927                printUChars(str, size);
928                log_err("\n");
929            }
930        }else{
931            UTF8_APPEND_CHAR_SAFE(str, offset, size, test[i+1]);
932            if(offset != movedOffset[count]){
933                log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
934                    count, movedOffset[count], offset);
935
936            }
937            if(uprv_memcmp(str, result[count], size) !=0){
938                log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed for count=%d. \nExpected:", count);
939                printUChars(result[count], size);
940                log_err("\nGot:     ");
941                printUChars(str, size);
942                log_err("\n");
943            }
944            /*call the API instead of MACRO
945            uprv_memcpy(str, s, size);
946            offset=test[i];
947            c=test[i+1];
948            if((uint32_t)(c)<=0x7f) {
949                  (str)[(offset)++]=(uint8_t)(c);
950            } else {
951                 (offset)=utf8_appendCharSafeBody(str, (int32_t)(offset), (int32_t)(size), c);
952            }
953            if(offset != movedOffset[count]){
954                log_err("ERROR: utf8_appendCharSafeBody() failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
955                    count, movedOffset[count], offset);
956
957            }
958            if(uprv_memcmp(str, result[count], size) !=0){
959                log_err("ERROR: utf8_appendCharSafeBody() failed for count=%d. \nExpected:", count);
960                printUChars(result[count], size);
961                printf("\nGot:     ");
962                printUChars(str, size);
963                printf("\n");
964            }
965            */
966        }
967        count++;
968    }
969
970
971}
972
973static void TestAppend() {
974    static const UChar32 codePoints[]={
975        0x61, 0xdf, 0x901, 0x3040,
976        0xac00, 0xd800, 0xdbff, 0xdcde,
977        0xdffd, 0xe000, 0xffff, 0x10000,
978        0x12345, 0xe0021, 0x10ffff, 0x110000,
979        0x234567, 0x7fffffff, -1, -1000,
980        0, 0x400
981    };
982    static const uint8_t expectUnsafe[]={
983        0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
984        0xea, 0xb0, 0x80,  0xed, 0xa0, 0x80,  0xed, 0xaf, 0xbf,  0xed, 0xb3, 0x9e,
985        0xed, 0xbf, 0xbd,  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
986        0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
987        /* none from this line */
988        0,  0xd0, 0x80
989    }, expectSafe[]={
990        0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
991        0xea, 0xb0, 0x80,  /* no surrogates */
992        /* no surrogates */  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
993        0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
994        /* none from this line */
995        0,  0xd0, 0x80
996    };
997
998    uint8_t buffer[100];
999    UChar32 c;
1000    int32_t i, length;
1001    UBool isError, expectIsError, wrongIsError;
1002
1003    length=0;
1004    for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
1005        c=codePoints[i];
1006        if(c<0 || 0x10ffff<c) {
1007            continue; /* skip non-code points for U8_APPEND_UNSAFE */
1008        }
1009
1010        U8_APPEND_UNSAFE(buffer, length, c);
1011    }
1012    if(length!=UPRV_LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)) {
1013        log_err("U8_APPEND_UNSAFE did not generate the expected output\n");
1014    }
1015
1016    length=0;
1017    wrongIsError=FALSE;
1018    for(i=0; i<UPRV_LENGTHOF(codePoints); ++i) {
1019        c=codePoints[i];
1020        expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
1021        isError=FALSE;
1022
1023        U8_APPEND(buffer, length, UPRV_LENGTHOF(buffer), c, isError);
1024        wrongIsError|= isError!=expectIsError;
1025    }
1026    if(wrongIsError) {
1027        log_err("U8_APPEND did not set isError correctly\n");
1028    }
1029    if(length!=UPRV_LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) {
1030        log_err("U8_APPEND did not generate the expected output\n");
1031    }
1032}
1033
1034static void
1035TestSurrogates() {
1036    static const uint8_t b[]={
1037        0xc3, 0x9f,             /*  00DF */
1038        0xed, 0x9f, 0xbf,       /*  D7FF */
1039        0xed, 0xa0, 0x81,       /*  D801 */
1040        0xed, 0xbf, 0xbe,       /*  DFFE */
1041        0xee, 0x80, 0x80,       /*  E000 */
1042        0xf0, 0x97, 0xbf, 0xbe  /* 17FFE */
1043    };
1044    static const UChar32 cp[]={
1045        0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe
1046    };
1047
1048    UChar32 cu, cs, cl;
1049    int32_t i, j, k, iu, is, il, length;
1050
1051    k=0; /* index into cp[] */
1052    length=UPRV_LENGTHOF(b);
1053    for(i=0; i<length;) {
1054        j=i;
1055        U8_NEXT_UNSAFE(b, j, cu);
1056        iu=j;
1057
1058        j=i;
1059        U8_NEXT(b, j, length, cs);
1060        is=j;
1061
1062        j=i;
1063        L8_NEXT(b, j, length, cl);
1064        il=j;
1065
1066        if(cu!=cp[k]) {
1067            log_err("U8_NEXT_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
1068        }
1069
1070        /* U8_NEXT() returns <0 for surrogate code points */
1071        if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
1072            log_err("U8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
1073        }
1074
1075        /* L8_NEXT() returns surrogate code points like U8_NEXT_UNSAFE() */
1076        if(cl!=cu) {
1077            log_err("L8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
1078        }
1079
1080        if(is!=iu || il!=iu) {
1081            log_err("U8_NEXT(b[%ld]) or L8_NEXT(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
1082        }
1083
1084        ++k;    /* next code point */
1085        i=iu;   /* advance by one UTF-8 sequence */
1086    }
1087
1088    while(i>0) {
1089        --k; /* previous code point */
1090
1091        j=i;
1092        U8_PREV_UNSAFE(b, j, cu);
1093        iu=j;
1094
1095        j=i;
1096        U8_PREV(b, 0, j, cs);
1097        is=j;
1098
1099        j=i;
1100        L8_PREV(b, 0, j, cl);
1101        il=j;
1102
1103        if(cu!=cp[k]) {
1104            log_err("U8_PREV_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
1105        }
1106
1107        /* U8_PREV() returns <0 for surrogate code points */
1108        if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
1109            log_err("U8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
1110        }
1111
1112        /* L8_PREV() returns surrogate code points like U8_PREV_UNSAFE() */
1113        if(cl!=cu) {
1114            log_err("L8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
1115        }
1116
1117        if(is!=iu || il !=iu) {
1118            log_err("U8_PREV(b[%ld]) or L8_PREV(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
1119        }
1120
1121        i=iu;   /* go back by one UTF-8 sequence */
1122    }
1123}
1124
1125static void printUChars(const uint8_t *uchars, int16_t len){
1126    int16_t i=0;
1127    for(i=0; i<len; i++){
1128        log_err("0x%02x ", *(uchars+i));
1129    }
1130}
1131