1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1998-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*
7* File utf8tst.c
8*
9* Modification History:
10*
11*   Date          Name        Description
12*   07/24/2000    Madhu       Creation
13*******************************************************************************
14*/
15
16#include "unicode/utypes.h"
17#include "unicode/utf8.h"
18#include "cmemory.h"
19#include "cintltst.h"
20
21#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
22
23/* lenient UTF-8 ------------------------------------------------------------ */
24
25/*
26 * Lenient UTF-8 differs from conformant UTF-8 in that it allows surrogate
27 * code points with their "natural" encoding.
28 * Effectively, this allows a mix of UTF-8 and CESU-8 as well as encodings of
29 * single surrogates.
30 *
31 * This is not conformant with UTF-8.
32 *
33 * Supplementary code points may be encoded as pairs of 3-byte sequences, but
34 * the macros below do not attempt to assemble such pairs.
35 */
36
37#define L8_NEXT(s, i, length, c) { \
38    (c)=(uint8_t)(s)[(i)++]; \
39    if((c)>=0x80) { \
40        if(U8_IS_LEAD(c)) { \
41            (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -2); \
42        } else { \
43            (c)=U_SENTINEL; \
44        } \
45    } \
46}
47
48#define L8_PREV(s, start, i, c) { \
49    (c)=(uint8_t)(s)[--(i)]; \
50    if((c)>=0x80) { \
51        if((c)<=0xbf) { \
52            (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -2); \
53        } else { \
54            (c)=U_SENTINEL; \
55        } \
56    } \
57}
58
59/* -------------------------------------------------------------------------- */
60
61static void printUChars(const uint8_t *uchars, int16_t len);
62
63static void TestCodeUnitValues(void);
64static void TestCharLength(void);
65static void TestGetChar(void);
66static void TestNextPrevChar(void);
67static void TestNulTerminated(void);
68static void TestNextPrevNonCharacters(void);
69static void TestNextPrevCharUnsafe(void);
70static void TestFwdBack(void);
71static void TestFwdBackUnsafe(void);
72static void TestSetChar(void);
73static void TestSetCharUnsafe(void);
74static void TestAppendChar(void);
75static void TestAppend(void);
76static void TestSurrogates(void);
77
78void addUTF8Test(TestNode** root);
79
80void
81addUTF8Test(TestNode** root)
82{
83    addTest(root, &TestCodeUnitValues,          "utf8tst/TestCodeUnitValues");
84    addTest(root, &TestCharLength,              "utf8tst/TestCharLength");
85    addTest(root, &TestGetChar,                 "utf8tst/TestGetChar");
86    addTest(root, &TestNextPrevChar,            "utf8tst/TestNextPrevChar");
87    addTest(root, &TestNulTerminated,           "utf8tst/TestNulTerminated");
88    addTest(root, &TestNextPrevNonCharacters,   "utf8tst/TestNextPrevNonCharacters");
89    addTest(root, &TestNextPrevCharUnsafe,      "utf8tst/TestNextPrevCharUnsafe");
90    addTest(root, &TestFwdBack,                 "utf8tst/TestFwdBack");
91    addTest(root, &TestFwdBackUnsafe,           "utf8tst/TestFwdBackUnsafe");
92    addTest(root, &TestSetChar,                 "utf8tst/TestSetChar");
93    addTest(root, &TestSetCharUnsafe,           "utf8tst/TestSetCharUnsafe");
94    addTest(root, &TestAppendChar,              "utf8tst/TestAppendChar");
95    addTest(root, &TestAppend,                  "utf8tst/TestAppend");
96    addTest(root, &TestSurrogates,              "utf8tst/TestSurrogates");
97}
98
99static void TestCodeUnitValues()
100{
101    static const uint8_t codeunit[]={0x00, 0x65, 0x7e, 0x7f, 0xc0, 0xc4, 0xf0, 0xfd, 0x80, 0x81, 0xbc, 0xbe,};
102
103    int16_t i;
104    for(i=0; i<LENGTHOF(codeunit); i++){
105        uint8_t c=codeunit[i];
106        log_verbose("Testing code unit value of %x\n", c);
107        if(i<4){
108            if(!UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || UTF8_IS_TRAIL(c) || !U8_IS_SINGLE(c) || U8_IS_LEAD(c) || U8_IS_TRAIL(c)){
109                log_err("ERROR: 0x%02x is a single byte but results in single: %c lead: %c trail: %c\n",
110                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
111            }
112        } else if(i< 8){
113            if(!UTF8_IS_LEAD(c) || UTF8_IS_SINGLE(c) || UTF8_IS_TRAIL(c) || !U8_IS_LEAD(c) || U8_IS_SINGLE(c) || U8_IS_TRAIL(c)){
114                log_err("ERROR: 0x%02x is a lead byte but results in single: %c lead: %c trail: %c\n",
115                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
116            }
117        } else if(i< 12){
118            if(!UTF8_IS_TRAIL(c) || UTF8_IS_SINGLE(c) || UTF8_IS_LEAD(c) || !U8_IS_TRAIL(c) || U8_IS_SINGLE(c) || U8_IS_LEAD(c)){
119                log_err("ERROR: 0x%02x is a trail byte but results in single: %c lead: %c trail: %c\n",
120                    c, UTF8_IS_SINGLE(c) ? 'y' : 'n', UTF8_IS_LEAD(c) ? 'y' : 'n', UTF8_IS_TRAIL(c) ? 'y' : 'n');
121            }
122        }
123    }
124}
125
126static void TestCharLength()
127{
128    static const uint32_t codepoint[]={
129        1, 0x0061,
130        1, 0x007f,
131        2, 0x016f,
132        2, 0x07ff,
133        3, 0x0865,
134        3, 0x20ac,
135        4, 0x20402,
136        4, 0x23456,
137        4, 0x24506,
138        4, 0x20402,
139        4, 0x10402,
140        3, 0xd7ff,
141        3, 0xe000,
142
143    };
144
145    int16_t i;
146    UBool multiple;
147    for(i=0; i<LENGTHOF(codepoint); i=(int16_t)(i+2)){
148        UChar32 c=codepoint[i+1];
149        if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uint16_t)codepoint[i]){
150              log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c));
151        }else{
152              log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_LENGTH(c));
153        }
154        multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
155        if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){
156              log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c);
157        }
158    }
159}
160
161static void TestGetChar()
162{
163    static const uint8_t input[]={
164    /*  code unit,*/
165        0x61,
166        0x7f,
167        0xe4,
168        0xba,
169        0x8c,
170        0xF0,
171        0x90,
172        0x90,
173        0x81,
174        0xc0,
175        0x65,
176        0x31,
177        0x9a,
178        0xc9
179    };
180    static const UChar32 result[]={
181    /*  codepoint-unsafe, codepoint-safe(not strict)  codepoint-safe(strict) */
182        0x61,             0x61,                       0x61,
183        0x7f,             0x7f,                       0x7f,
184        0x4e8c,           0x4e8c,                     0x4e8c,
185        0x4e8c,           0x4e8c,                     0x4e8c ,
186        0x4e8c,           0x4e8c,                     0x4e8c,
187        0x10401,          0x10401,                    0x10401 ,
188        0x10401,          0x10401,                    0x10401 ,
189        0x10401,          0x10401,                    0x10401 ,
190        0x10401,          0x10401,                    0x10401,
191        0x25,             UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1,
192        0x65,             0x65,                       0x65,
193        0x31,             0x31,                       0x31,
194        0x31,             UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1,
195        0x240,            UTF8_ERROR_VALUE_1,         UTF8_ERROR_VALUE_1
196    };
197    uint16_t i=0;
198    UChar32 c, expected;
199    uint32_t offset=0;
200
201    for(offset=0; offset<sizeof(input); offset++) {
202        if (offset < sizeof(input) - 1) {
203            UTF8_GET_CHAR_UNSAFE(input, offset, c);
204            if(c != result[i]){
205                log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
206
207            }
208
209            U8_GET_UNSAFE(input, offset, c);
210            if(c != result[i]){
211                log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
212
213            }
214        }
215
216        UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, FALSE);
217        expected=result[i+1];
218        if(c != expected){
219            log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
220        }
221
222        U8_GET(input, 0, offset, sizeof(input), c);
223        if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; }
224        if(c != expected){
225            log_err("ERROR: U8_GET failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
226        }
227
228        U8_GET_OR_FFFD(input, 0, offset, sizeof(input), c);
229        if(expected<0) { expected=0xfffd; }
230        if(c != expected){
231            log_err("ERROR: U8_GET_OR_FFFD failed for offset=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
232        }
233
234        UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, TRUE);
235        if(c != result[i+2]){
236            log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
237        }
238
239        i=(uint16_t)(i+3);
240    }
241}
242
243static void TestNextPrevChar() {
244    static const uint8_t input[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00};
245    static const UChar32 result[]={
246    /*  next_unsafe    next_safe_ns        next_safe_s          prev_unsafe   prev_safe_ns        prev_safe_s */
247        0x0061,        0x0061,             0x0061,              0x0000,       0x0000,             0x0000,
248        0x10401,       0x10401,            0x10401,             0xf0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
249        0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x2841410,    UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
250        0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xa1050,      UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
251        0x81,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x2841,       UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
252        0x00,          UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,  0x61,         0x61,               0x61,
253        0x80,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xc2,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
254        0xfd,          UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,  0x77e,        UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
255        0xbe,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xfd,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
256        0xa1,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x00,         UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
257        0x61,          0x61,               0x61,                0xc0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
258        0x81,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x10401,      0x10401,            0x10401,
259        0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x410,        UTF_ERROR_VALUE,    UTF_ERROR_VALUE,
260        0x90,          UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0x410,        UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2,
261        0x0840,        UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,  0xf0,         UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1,
262        0x0000,        0x0000,             0x0000,              0x0061,       0x0061,             0x0061
263    };
264    static const int32_t movedOffset[]={
265    /*  next_unsafe   next_safe_ns next_safe_s       prev_unsafe   prev_safe_ns      prev_safe_s */
266        1,            1,           1,                15,           15,               15,
267        5,            5,           5,                14,           14 ,              14,
268        3,            3,           3,                9,            13,               13,
269        4,            4,           4,                9,            12,               12,
270        5,            5,           5,                9,            11,               11,
271        7,            7,           7,                10,           10,               10,
272        7,            7,           7,                9,            9,                9,
273        8,            9,           9,                7,            7,                7,
274        9,            9,           9,                7,            7,                7,
275        11,           10,          10,               5,            5,                5,
276        11,           11,          11,               5,            5,                5,
277        12,           12,          12,               1,            1,                1,
278        13,           13,          13,               1,            1,                1,
279        14,           14,          14,               1,            1,                1,
280        14,           15,          15,               1,            1,                1,
281        14,           16,          16,               0,            0,                0,
282    };
283    /* TODO: remove unused columns for next_unsafe & prev_unsafe, and adjust the test code */
284
285    UChar32 c, expected;
286    uint32_t i=0;
287    uint32_t offset=0;
288    int32_t setOffset=0;
289    for(offset=0; offset<sizeof(input); offset++){
290         setOffset=offset;
291         UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE);
292         if(setOffset != movedOffset[i+1]){
293             log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
294                 offset, movedOffset[i+1], setOffset);
295         }
296        expected=result[i+1];
297        if(c != expected){
298            log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
299        }
300
301         setOffset=offset;
302         U8_NEXT(input, setOffset, sizeof(input), c);
303         if(setOffset != movedOffset[i+1]){
304             log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
305                 offset, movedOffset[i+1], setOffset);
306         }
307        if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; }
308        if(c != expected){
309            log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
310        }
311
312        setOffset=offset;
313        U8_NEXT_OR_FFFD(input, setOffset, sizeof(input), c);
314        if(setOffset != movedOffset[i+1]){
315            log_err("ERROR: U8_NEXT_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
316                offset, movedOffset[i+1], setOffset);
317        }
318        if(expected<0) { expected=0xfffd; }
319        if(c != expected){
320            log_err("ERROR: U8_NEXT_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
321        }
322
323         setOffset=offset;
324         UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE);
325         if(setOffset != movedOffset[i+1]){
326             log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
327                 offset, movedOffset[i+2], setOffset);
328         }
329         if(c != result[i+2]){
330             log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
331         }
332
333         i=i+6;
334    }
335
336    i=0;
337    for(offset=sizeof(input); offset > 0; --offset){
338         setOffset=offset;
339         UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
340         if(setOffset != movedOffset[i+4]){
341             log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
342                 offset, movedOffset[i+4], setOffset);
343         }
344        expected=result[i+4];
345        if(c != expected){
346            log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
347        }
348
349         setOffset=offset;
350         U8_PREV(input, 0, setOffset, c);
351         if(setOffset != movedOffset[i+4]){
352             log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
353                 offset, movedOffset[i+4], setOffset);
354         }
355        if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; }
356        if(c != expected){
357            log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
358        }
359
360        setOffset=offset;
361        U8_PREV_OR_FFFD(input, 0, setOffset, c);
362        if(setOffset != movedOffset[i+4]){
363            log_err("ERROR: U8_PREV_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
364                offset, movedOffset[i+4], setOffset);
365        }
366        if(expected<0) { expected=0xfffd; }
367        if(c != expected){
368            log_err("ERROR: U8_PREV_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c);
369        }
370
371         setOffset=offset;
372         UTF8_PREV_CHAR_SAFE(input, 0,  setOffset, c, TRUE);
373         if(setOffset != movedOffset[i+5]){
374             log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
375                 offset, movedOffset[i+5], setOffset);
376         }
377         if(c != result[i+5]){
378             log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
379         }
380
381         i=i+6;
382    }
383}
384
385/* keep this in sync with utf16tst.c's TestNulTerminated() */
386static void TestNulTerminated() {
387    static const uint8_t input[]={
388        /*  0 */  0x61,
389        /*  1 */  0xf0, 0x90, 0x90, 0x81,
390        /*  5 */  0xc0, 0x80,
391        /*  7 */  0xdf, 0x80,
392        /*  9 */  0xc2,
393        /* 10 */  0x62,
394        /* 11 */  0xfd, 0xbe,
395        /* 13 */  0xe0, 0xa0, 0x80,
396        /* 16 */  0xe2, 0x82, 0xac,
397        /* 19 */  0xf0, 0x90, 0x90,
398        /* 22 */  0x00
399        /* 23 */
400    };
401    static const UChar32 result[]={
402        0x61,
403        0x10401,
404        U_SENTINEL,
405        0x7c0,
406        U_SENTINEL,
407        0x62,
408        U_SENTINEL,
409        0x800,
410        0x20ac,
411        U_SENTINEL,
412        0
413    };
414
415    UChar32 c, c2, expected;
416    int32_t i0, i=0, j, k, expectedIndex;
417    int32_t cpIndex=0;
418    do {
419        i0=i;
420        U8_NEXT(input, i, -1, c);
421        expected=result[cpIndex];
422        if(c!=expected) {
423            log_err("U8_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, expected);
424        }
425        j=i0;
426        U8_NEXT_OR_FFFD(input, j, -1, c);
427        if(expected<0) { expected=0xfffd; }
428        if(c!=expected) {
429            log_err("U8_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x\n", i0, c, expected);
430        }
431        if(j!=i) {
432            log_err("U8_NEXT_OR_FFFD() moved to index %d but U8_NEXT() moved to %d\n", j, i);
433        }
434        j=i0;
435        U8_FWD_1(input, j, -1);
436        if(j!=i) {
437            log_err("U8_FWD_1() moved to index %d but U8_NEXT() moved to %d\n", j, i);
438        }
439        ++cpIndex;
440        /*
441         * Move by this many code points from the start.
442         * U8_FWD_N() stops at the end of the string, that is, at the NUL if necessary.
443         */
444        expectedIndex= (c==0) ? i-1 : i;
445        k=0;
446        U8_FWD_N(input, k, -1, cpIndex);
447        if(k!=expectedIndex) {
448            log_err("U8_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex);
449        }
450    } while(c!=0);
451
452    i=0;
453    do {
454        j=i0=i;
455        U8_NEXT(input, i, -1, c);
456        do {
457            U8_GET(input, 0, j, -1, c2);
458            if(c2!=c) {
459                log_err("U8_NEXT(from %d)=U+%04x != U+%04x=U8_GET(at %d)\n", i0, c, c2, j);
460            }
461            U8_GET_OR_FFFD(input, 0, j, -1, c2);
462            expected= (c>=0) ? c : 0xfffd;
463            if(c2!=expected) {
464                log_err("U8_NEXT_OR_FFFD(from %d)=U+%04x != U+%04x=U8_GET_OR_FFFD(at %d)\n", i0, expected, c2, j);
465            }
466            /* U8_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */
467            k=j+1;
468            U8_SET_CP_LIMIT(input, 0, k, -1);
469            if(k!=i) {
470                log_err("U8_NEXT() moved to %d but U8_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k);
471            }
472        } while(++j<i);
473    } while(c!=0);
474}
475
476static void TestNextPrevNonCharacters() {
477    /* test non-characters */
478    static const uint8_t nonChars[]={
479        0xef, 0xb7, 0x90,       /* U+fdd0 */
480        0xef, 0xbf, 0xbf,       /* U+feff */
481        0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */
482        0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */
483        0xf4, 0x8f, 0xbf, 0xbe  /* U+10fffe */
484    };
485
486    UChar32 ch;
487    int32_t idx;
488
489    for(idx=0; idx<(int32_t)sizeof(nonChars);) {
490        U8_NEXT(nonChars, idx, sizeof(nonChars), ch);
491        if(!U_IS_UNICODE_NONCHAR(ch)) {
492            log_err("U8_NEXT(before %d) failed to read a non-character\n", idx);
493        }
494    }
495    for(idx=(int32_t)sizeof(nonChars); idx>0;) {
496        U8_PREV(nonChars, 0, idx, ch);
497        if(!U_IS_UNICODE_NONCHAR(ch)) {
498            log_err("U8_PREV(at %d) failed to read a non-character\n", idx);
499        }
500    }
501}
502
503static void TestNextPrevCharUnsafe() {
504    /*
505     * Use a (mostly) well-formed UTF-8 string and test at code point boundaries.
506     * The behavior of _UNSAFE macros for ill-formed strings is undefined.
507     */
508    static const uint8_t input[]={
509        0x61,
510        0xf0, 0x90, 0x90, 0x81,
511        0xc0, 0x80,  /* non-shortest form */
512        0xe2, 0x82, 0xac,
513        0xc2, 0xa1,
514        0xf4, 0x8f, 0xbf, 0xbf,
515        0x00
516    };
517    static const UChar32 codePoints[]={
518        0x61,
519        0x10401,
520        0,
521        0x20ac,
522        0xa1,
523        0x10ffff,
524        0
525    };
526
527    UChar32 c;
528    int32_t i;
529    uint32_t offset;
530    for(i=0, offset=0; offset<sizeof(input); ++i) {
531        UTF8_NEXT_CHAR_UNSAFE(input, offset, c);
532        if(c != codePoints[i]){
533            log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
534                    offset, codePoints[i], c);
535        }
536    }
537    for(i=0, offset=0; offset<sizeof(input); ++i) {
538        U8_NEXT_UNSAFE(input, offset, c);
539        if(c != codePoints[i]){
540            log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
541                    offset, codePoints[i], c);
542        }
543    }
544
545    for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
546         UTF8_PREV_CHAR_UNSAFE(input, offset, c);
547         if(c != codePoints[i]){
548             log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
549                     offset, codePoints[i], c);
550         }
551    }
552    for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
553         U8_PREV_UNSAFE(input, offset, c);
554         if(c != codePoints[i]){
555             log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
556                     offset, codePoints[i], c);
557         }
558    }
559}
560
561static void TestFwdBack() {
562    static const uint8_t input[]={0x61, 0xF0, 0x90, 0x90, 0x81, 0xff, 0x62, 0xc0, 0x80, 0x7f, 0x8f, 0xc0, 0x63, 0x81, 0x90, 0x90, 0xF0, 0x00};
563    static const uint16_t fwd_safe[]   ={1, 5, 6, 7, 9, 10, 11,  12, 13, 14, 15, 16, 17, 18};
564    static const uint16_t back_safe[]  ={17, 16, 15, 14, 13, 12, 11, 10, 9, 7, 6, 5, 1, 0};
565
566    static const uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1, 5};
567    static const uint16_t fwd_N_safe[]   ={0, 1, 6, 10, 11, 13, 14, 18}; /*safe macro keeps it at the end of the string */
568    static const uint16_t back_N_safe[]  ={18, 17, 15, 12, 11, 9, 7, 0};
569
570    uint32_t offsafe=0;
571
572    uint32_t i=0;
573    while(offsafe < sizeof(input)){
574        UTF8_FWD_1_SAFE(input, offsafe, sizeof(input));
575        if(offsafe != fwd_safe[i]){
576            log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
577        }
578        i++;
579    }
580
581    i=0;
582    while(offsafe < sizeof(input)){
583        U8_FWD_1(input, offsafe, sizeof(input));
584        if(offsafe != fwd_safe[i]){
585            log_err("ERROR: U8_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
586        }
587        i++;
588    }
589
590    i=0;
591    offsafe=sizeof(input);
592    while(offsafe > 0){
593        UTF8_BACK_1_SAFE(input, 0,  offsafe);
594        if(offsafe != back_safe[i]){
595            log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_safe[i], offsafe);
596        }
597        i++;
598    }
599
600    i=0;
601    offsafe=sizeof(input);
602    while(offsafe > 0){
603        U8_BACK_1(input, 0,  offsafe);
604        if(offsafe != back_safe[i]){
605            log_err("ERROR: U8_BACK_1 offset expected:%d, Got:%d\n", back_safe[i], offsafe);
606        }
607        i++;
608    }
609
610    offsafe=0;
611    for(i=0; i<LENGTHOF(Nvalue); i++){
612        UTF8_FWD_N_SAFE(input, offsafe, sizeof(input), Nvalue[i]);
613        if(offsafe != fwd_N_safe[i]){
614            log_err("ERROR: Forward_N_safe offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
615        }
616
617    }
618
619    offsafe=0;
620    for(i=0; i<LENGTHOF(Nvalue); i++){
621        U8_FWD_N(input, offsafe, sizeof(input), Nvalue[i]);
622        if(offsafe != fwd_N_safe[i]){
623            log_err("ERROR: U8_FWD_N offset=%d expected:%d, Got:%d\n", i, fwd_N_safe[i], offsafe);
624        }
625
626    }
627
628    offsafe=sizeof(input);
629    for(i=0; i<LENGTHOF(Nvalue); i++){
630        UTF8_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
631        if(offsafe != back_N_safe[i]){
632            log_err("ERROR: backward_N_safe offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
633        }
634    }
635
636    offsafe=sizeof(input);
637    for(i=0; i<LENGTHOF(Nvalue); i++){
638        U8_BACK_N(input, 0, offsafe, Nvalue[i]);
639        if(offsafe != back_N_safe[i]){
640            log_err("ERROR: U8_BACK_N offset=%d expected:%d, Got:%ld\n", i, back_N_safe[i], offsafe);
641        }
642    }
643}
644
645static void TestFwdBackUnsafe() {
646    /*
647     * Use a (mostly) well-formed UTF-8 string and test at code point boundaries.
648     * The behavior of _UNSAFE macros for ill-formed strings is undefined.
649     */
650    static const uint8_t input[]={
651        0x61,
652        0xf0, 0x90, 0x90, 0x81,
653        0xc0, 0x80,  /* non-shortest form */
654        0xe2, 0x82, 0xac,
655        0xc2, 0xa1,
656        0xf4, 0x8f, 0xbf, 0xbf,
657        0x00
658    };
659    static const int8_t boundaries[]={ 0, 1, 5, 7, 10, 12, 16, 17 };
660
661    int32_t offset;
662    int32_t i;
663    for(i=1, offset=0; offset<LENGTHOF(input); ++i) {
664        UTF8_FWD_1_UNSAFE(input, offset);
665        if(offset != boundaries[i]){
666            log_err("ERROR: UTF8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
667        }
668    }
669    for(i=1, offset=0; offset<LENGTHOF(input); ++i) {
670        U8_FWD_1_UNSAFE(input, offset);
671        if(offset != boundaries[i]){
672            log_err("ERROR: U8_FWD_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
673        }
674    }
675
676    for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) {
677        UTF8_BACK_1_UNSAFE(input, offset);
678        if(offset != boundaries[i]){
679            log_err("ERROR: UTF8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
680        }
681    }
682    for(i=LENGTHOF(boundaries)-2, offset=LENGTHOF(input); offset>0; --i) {
683        U8_BACK_1_UNSAFE(input, offset);
684        if(offset != boundaries[i]){
685            log_err("ERROR: U8_BACK_1_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
686        }
687    }
688
689    for(i=0; i<LENGTHOF(boundaries); ++i) {
690        offset=0;
691        UTF8_FWD_N_UNSAFE(input, offset, i);
692        if(offset != boundaries[i]) {
693            log_err("ERROR: UTF8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
694        }
695    }
696    for(i=0; i<LENGTHOF(boundaries); ++i) {
697        offset=0;
698        U8_FWD_N_UNSAFE(input, offset, i);
699        if(offset != boundaries[i]) {
700            log_err("ERROR: U8_FWD_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[i], offset);
701        }
702    }
703
704    for(i=0; i<LENGTHOF(boundaries); ++i) {
705        int32_t j=LENGTHOF(boundaries)-1-i;
706        offset=LENGTHOF(input);
707        UTF8_BACK_N_UNSAFE(input, offset, i);
708        if(offset != boundaries[j]) {
709            log_err("ERROR: UTF8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset);
710        }
711    }
712    for(i=0; i<LENGTHOF(boundaries); ++i) {
713        int32_t j=LENGTHOF(boundaries)-1-i;
714        offset=LENGTHOF(input);
715        U8_BACK_N_UNSAFE(input, offset, i);
716        if(offset != boundaries[j]) {
717            log_err("ERROR: U8_BACK_N_UNSAFE offset expected:%d, Got:%d\n", boundaries[j], offset);
718        }
719    }
720}
721
722static void TestSetChar() {
723    static const uint8_t input[]
724        = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0xfe, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x00 };
725    static const int16_t start_safe[]
726        = {0,    1,    1,    1,    4,    5,    6,    7,    8,    9,    10,   11,   12,   13,  14 };
727    static const int16_t limit_safe[]
728        = {0,    1,    4,    4,    4,    5,    6,    7,    8,    9,    10,   11,   12,   13,  14 };
729
730    uint32_t i=0;
731    int32_t offset=0, setOffset=0;
732    for(offset=0; offset<=LENGTHOF(input); offset++){
733        if (offset<LENGTHOF(input)){
734            setOffset=offset;
735            UTF8_SET_CHAR_START_SAFE(input, 0, setOffset);
736            if(setOffset != start_safe[i]){
737                log_err("ERROR: UTF8_SET_CHAR_START_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
738            }
739
740            setOffset=offset;
741            U8_SET_CP_START(input, 0, setOffset);
742            if(setOffset != start_safe[i]){
743                log_err("ERROR: U8_SET_CP_START failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_safe[i], setOffset);
744            }
745        }
746
747        setOffset=offset;
748        UTF8_SET_CHAR_LIMIT_SAFE(input,0, setOffset, sizeof(input));
749        if(setOffset != limit_safe[i]){
750            log_err("ERROR: UTF8_SET_CHAR_LIMIT_SAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
751        }
752
753        setOffset=offset;
754        U8_SET_CP_LIMIT(input,0, setOffset, sizeof(input));
755        if(setOffset != limit_safe[i]){
756            log_err("ERROR: U8_SET_CP_LIMIT failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_safe[i], setOffset);
757        }
758
759        i++;
760    }
761}
762
763static void TestSetCharUnsafe() {
764    static const uint8_t input[]
765        = {0x61, 0xe4, 0xba, 0x8c, 0x7f, 0x2e, 0x62, 0xc5, 0x7f, 0x61, 0x80, 0x80, 0xe0, 0x80, 0x80, 0x00 };
766    static const int16_t start_unsafe[]
767        = {0,    1,    1,    1,    4,    5,    6,    7,    8,    9,    9,    9,    12,   12,   12,   15 };
768    static const int16_t limit_unsafe[]
769        = {0,    1,    4,    4,    4,    5,    6,    7,    9,    9,    10,   10,   10,   15,   15,   15,   16 };
770
771    uint32_t i=0;
772    int32_t offset=0, setOffset=0;
773    for(offset=0; offset<=LENGTHOF(input); offset++){
774        if (offset<LENGTHOF(input)){
775            setOffset=offset;
776            UTF8_SET_CHAR_START_UNSAFE(input, setOffset);
777            if(setOffset != start_unsafe[i]){
778                log_err("ERROR: UTF8_SET_CHAR_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
779            }
780
781            setOffset=offset;
782            U8_SET_CP_START_UNSAFE(input, setOffset);
783            if(setOffset != start_unsafe[i]){
784                log_err("ERROR: U8_SET_CP_START_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, start_unsafe[i], setOffset);
785            }
786        }
787
788        if (offset != 0) { /* Can't have it go off the end of the array */
789            setOffset=offset;
790            UTF8_SET_CHAR_LIMIT_UNSAFE(input, setOffset);
791            if(setOffset != limit_unsafe[i]){
792                log_err("ERROR: UTF8_SET_CHAR_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
793            }
794
795            setOffset=offset;
796            U8_SET_CP_LIMIT_UNSAFE(input, setOffset);
797            if(setOffset != limit_unsafe[i]){
798                log_err("ERROR: U8_SET_CP_LIMIT_UNSAFE failed for offset=%ld. Expected:%ld Got:%ld\n", offset, limit_unsafe[i], setOffset);
799            }
800        }
801
802        i++;
803    }
804}
805
806static void TestAppendChar(){
807    static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};
808    static const uint32_t test[]={
809    /*  append-position(unsafe),  CHAR to be appended */
810        0,                        0x10401,
811        2,                        0x0028,
812        2,                        0x007f,
813        3,                        0xd801,
814        1,                        0x20402,
815        8,                        0x10401,
816        5,                        0xc0,
817        5,                        0xc1,
818        5,                        0xfd,
819        6,                        0x80,
820        6,                        0x81,
821        6,                        0xbf,
822        7,                        0xfe,
823
824    /*  append-position(safe),    CHAR to be appended */
825        0,                        0x10401,
826        2,                        0x0028,
827        3,                        0x7f,
828        3,                        0xd801,   /* illegal for UTF-8 starting with Unicode 3.2 */
829        1,                        0x20402,
830        9,                        0x10401,
831        5,                        0xc0,
832        5,                        0xc1,
833        5,                        0xfd,
834        6,                        0x80,
835        6,                        0x81,
836        6,                        0xbf,
837        7,                        0xfe,
838
839    };
840    static const uint16_t movedOffset[]={
841    /* offset-moved-to(unsafe) */
842          4,              /*for append-pos: 0 , CHAR 0x10401*/
843          3,
844          3,
845          6,
846          5,
847          12,
848          7,
849          7,
850          7,
851          8,
852          8,
853          8,
854          9,
855
856    /* offset-moved-to(safe) */
857          4,              /*for append-pos: 0, CHAR  0x10401*/
858          3,
859          4,
860          6,
861          5,
862          11,
863          7,
864          7,
865          7,
866          8,
867          8,
868          8,
869          9,
870
871    };
872
873    static const uint8_t result[][11]={
874        /*unsafe*/
875        {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
876        {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
877        {0x61, 0x62, 0x7f, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
878        {0x61, 0x62, 0x63, 0xed, 0xa0, 0x81, 0x67, 0x68, 0x69, 0x6a, 0x00},
879        {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
880        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0xF0, 0x90, 0x90},
881
882        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
883        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
884        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
885
886        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
887        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
888        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
889
890        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
891        /*safe*/
892        {0xF0, 0x90, 0x90, 0x81, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
893        {0x61, 0x62, 0x28, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
894        {0x61, 0x62, 0x63, 0x7f, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
895        {0x61, 0x62, 0x63, 0xef, 0xbf, 0xbf, 0x67, 0x68, 0x69, 0x6a, 0x00},
896        {0x61, 0xF0, 0xa0, 0x90, 0x82, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00},
897        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xc2, 0x9f}, /*gets UTF8_ERROR_VALUE_2 which takes 2 bytes 0xc0, 0x9f*/
898
899        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x80, 0x68, 0x69, 0x6a, 0x00},
900        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0x81, 0x68, 0x69, 0x6a, 0x00},
901        {0x61, 0x62, 0x63, 0x64, 0x65, 0xc3, 0xbd, 0x68, 0x69, 0x6a, 0x00},
902
903        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x80, 0x69, 0x6a, 0x00},
904        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0x81, 0x69, 0x6a, 0x00},
905        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0xc2, 0xbf, 0x69, 0x6a, 0x00},
906
907        {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xc3, 0xbe, 0x6a, 0x00},
908
909    };
910    uint16_t i, count=0;
911    uint8_t str[12];
912    uint32_t offset;
913/*    UChar32 c=0;*/
914    uint16_t size=LENGTHOF(s);
915    for(i=0; i<LENGTHOF(test); i=(uint16_t)(i+2)){
916        uprv_memcpy(str, s, size);
917        offset=test[i];
918        if(count<13){
919            UTF8_APPEND_CHAR_UNSAFE(str, offset, test[i+1]);
920            if(offset != movedOffset[count]){
921                log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
922                    count, movedOffset[count], offset);
923
924            }
925            if(uprv_memcmp(str, result[count], size) !=0){
926                log_err("ERROR: UTF8_APPEND_CHAR_UNSAFE failed for count=%d. \nExpected:", count);
927                printUChars(result[count], size);
928                log_err("\nGot:      ");
929                printUChars(str, size);
930                log_err("\n");
931            }
932        }else{
933            UTF8_APPEND_CHAR_SAFE(str, offset, size, test[i+1]);
934            if(offset != movedOffset[count]){
935                log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
936                    count, movedOffset[count], offset);
937
938            }
939            if(uprv_memcmp(str, result[count], size) !=0){
940                log_err("ERROR: UTF8_APPEND_CHAR_SAFE failed for count=%d. \nExpected:", count);
941                printUChars(result[count], size);
942                log_err("\nGot:     ");
943                printUChars(str, size);
944                log_err("\n");
945            }
946            /*call the API instead of MACRO
947            uprv_memcpy(str, s, size);
948            offset=test[i];
949            c=test[i+1];
950            if((uint32_t)(c)<=0x7f) {
951                  (str)[(offset)++]=(uint8_t)(c);
952            } else {
953                 (offset)=utf8_appendCharSafeBody(str, (int32_t)(offset), (int32_t)(size), c);
954            }
955            if(offset != movedOffset[count]){
956                log_err("ERROR: utf8_appendCharSafeBody() failed to move the offset correctly for count=%d.\nExpectedOffset=%d  currentOffset=%d\n",
957                    count, movedOffset[count], offset);
958
959            }
960            if(uprv_memcmp(str, result[count], size) !=0){
961                log_err("ERROR: utf8_appendCharSafeBody() failed for count=%d. \nExpected:", count);
962                printUChars(result[count], size);
963                printf("\nGot:     ");
964                printUChars(str, size);
965                printf("\n");
966            }
967            */
968        }
969        count++;
970    }
971
972
973}
974
975static void TestAppend() {
976    static const UChar32 codePoints[]={
977        0x61, 0xdf, 0x901, 0x3040,
978        0xac00, 0xd800, 0xdbff, 0xdcde,
979        0xdffd, 0xe000, 0xffff, 0x10000,
980        0x12345, 0xe0021, 0x10ffff, 0x110000,
981        0x234567, 0x7fffffff, -1, -1000,
982        0, 0x400
983    };
984    static const uint8_t expectUnsafe[]={
985        0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
986        0xea, 0xb0, 0x80,  0xed, 0xa0, 0x80,  0xed, 0xaf, 0xbf,  0xed, 0xb3, 0x9e,
987        0xed, 0xbf, 0xbd,  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
988        0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
989        /* none from this line */
990        0,  0xd0, 0x80
991    }, expectSafe[]={
992        0x61,  0xc3, 0x9f,  0xe0, 0xa4, 0x81,  0xe3, 0x81, 0x80,
993        0xea, 0xb0, 0x80,  /* no surrogates */
994        /* no surrogates */  0xee, 0x80, 0x80,  0xef, 0xbf, 0xbf,  0xf0, 0x90, 0x80, 0x80,
995        0xf0, 0x92, 0x8d, 0x85,  0xf3, 0xa0, 0x80, 0xa1,  0xf4, 0x8f, 0xbf, 0xbf,  /* not 0x110000 */
996        /* none from this line */
997        0,  0xd0, 0x80
998    };
999
1000    uint8_t buffer[100];
1001    UChar32 c;
1002    int32_t i, length;
1003    UBool isError, expectIsError, wrongIsError;
1004
1005    length=0;
1006    for(i=0; i<LENGTHOF(codePoints); ++i) {
1007        c=codePoints[i];
1008        if(c<0 || 0x10ffff<c) {
1009            continue; /* skip non-code points for U8_APPEND_UNSAFE */
1010        }
1011
1012        U8_APPEND_UNSAFE(buffer, length, c);
1013    }
1014    if(length!=LENGTHOF(expectUnsafe) || 0!=memcmp(buffer, expectUnsafe, length)) {
1015        log_err("U8_APPEND_UNSAFE did not generate the expected output\n");
1016    }
1017
1018    length=0;
1019    wrongIsError=FALSE;
1020    for(i=0; i<LENGTHOF(codePoints); ++i) {
1021        c=codePoints[i];
1022        expectIsError= c<0 || 0x10ffff<c || U_IS_SURROGATE(c);
1023        isError=FALSE;
1024
1025        U8_APPEND(buffer, length, LENGTHOF(buffer), c, isError);
1026        wrongIsError|= isError!=expectIsError;
1027    }
1028    if(wrongIsError) {
1029        log_err("U8_APPEND did not set isError correctly\n");
1030    }
1031    if(length!=LENGTHOF(expectSafe) || 0!=memcmp(buffer, expectSafe, length)) {
1032        log_err("U8_APPEND did not generate the expected output\n");
1033    }
1034}
1035
1036static void
1037TestSurrogates() {
1038    static const uint8_t b[]={
1039        0xc3, 0x9f,             /*  00DF */
1040        0xed, 0x9f, 0xbf,       /*  D7FF */
1041        0xed, 0xa0, 0x81,       /*  D801 */
1042        0xed, 0xbf, 0xbe,       /*  DFFE */
1043        0xee, 0x80, 0x80,       /*  E000 */
1044        0xf0, 0x97, 0xbf, 0xbe  /* 17FFE */
1045    };
1046    static const UChar32 cp[]={
1047        0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe
1048    };
1049
1050    UChar32 cu, cs, cl;
1051    int32_t i, j, k, iu, is, il, length;
1052
1053    k=0; /* index into cp[] */
1054    length=LENGTHOF(b);
1055    for(i=0; i<length;) {
1056        j=i;
1057        U8_NEXT_UNSAFE(b, j, cu);
1058        iu=j;
1059
1060        j=i;
1061        U8_NEXT(b, j, length, cs);
1062        is=j;
1063
1064        j=i;
1065        L8_NEXT(b, j, length, cl);
1066        il=j;
1067
1068        if(cu!=cp[k]) {
1069            log_err("U8_NEXT_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
1070        }
1071
1072        /* U8_NEXT() returns <0 for surrogate code points */
1073        if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
1074            log_err("U8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
1075        }
1076
1077        /* L8_NEXT() returns surrogate code points like U8_NEXT_UNSAFE() */
1078        if(cl!=cu) {
1079            log_err("L8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
1080        }
1081
1082        if(is!=iu || il!=iu) {
1083            log_err("U8_NEXT(b[%ld]) or L8_NEXT(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
1084        }
1085
1086        ++k;    /* next code point */
1087        i=iu;   /* advance by one UTF-8 sequence */
1088    }
1089
1090    while(i>0) {
1091        --k; /* previous code point */
1092
1093        j=i;
1094        U8_PREV_UNSAFE(b, j, cu);
1095        iu=j;
1096
1097        j=i;
1098        U8_PREV(b, 0, j, cs);
1099        is=j;
1100
1101        j=i;
1102        L8_PREV(b, 0, j, cl);
1103        il=j;
1104
1105        if(cu!=cp[k]) {
1106            log_err("U8_PREV_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
1107        }
1108
1109        /* U8_PREV() returns <0 for surrogate code points */
1110        if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
1111            log_err("U8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
1112        }
1113
1114        /* L8_PREV() returns surrogate code points like U8_PREV_UNSAFE() */
1115        if(cl!=cu) {
1116            log_err("L8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
1117        }
1118
1119        if(is!=iu || il !=iu) {
1120            log_err("U8_PREV(b[%ld]) or L8_PREV(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
1121        }
1122
1123        i=iu;   /* go back by one UTF-8 sequence */
1124    }
1125}
1126
1127static void printUChars(const uint8_t *uchars, int16_t len){
1128    int16_t i=0;
1129    for(i=0; i<len; i++){
1130        log_err("0x%02x ", *(uchars+i));
1131    }
1132}
1133