1/*
2******************************************************************************
3*
4*   Copyright (C) 2002-2014, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  custrtst.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2002oct09
14*   created by: Markus W. Scherer
15*
16*   Tests of ustring.h Unicode string API functions.
17*/
18
19#include "unicode/ustring.h"
20#include "unicode/ucnv.h"
21#include "unicode/uiter.h"
22#include "cintltst.h"
23#include <string.h>
24
25#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
26
27/* get the sign of an integer */
28#define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
29
30/* test setup --------------------------------------------------------------- */
31
32static void setUpDataTable(void);
33static void TestStringCopy(void);
34static void TestStringFunctions(void);
35static void TestStringSearching(void);
36static void TestSurrogateSearching(void);
37static void TestUnescape(void);
38static void TestCountChar32(void);
39static void TestUCharIterator(void);
40
41void addUStringTest(TestNode** root);
42
43void addUStringTest(TestNode** root)
44{
45    addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
46    addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
47    addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
48    addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
49    addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
50    addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
51    addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
52}
53
54/* test data for TestStringFunctions ---------------------------------------- */
55
56UChar*** dataTable = NULL;
57
58static const char* raw[3][4] = {
59
60    /* First String */
61    {   "English_",  "French_",   "Croatian_", "English_"},
62    /* Second String */
63    {   "United States",    "France",   "Croatia",  "Unites States"},
64
65   /* Concatenated string */
66    {   "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
67};
68
69static void setUpDataTable()
70{
71    int32_t i,j;
72    if(dataTable == NULL) {
73        dataTable = (UChar***)calloc(sizeof(UChar**),3);
74
75            for (i = 0; i < 3; i++) {
76              dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
77                for (j = 0; j < 4; j++){
78                    dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
79                    u_uastrcpy(dataTable[i][j],raw[i][j]);
80                }
81            }
82    }
83}
84
85static void cleanUpDataTable()
86{
87    int32_t i,j;
88    if(dataTable != NULL) {
89        for (i=0; i<3; i++) {
90            for(j = 0; j<4; j++) {
91                free(dataTable[i][j]);
92            }
93            free(dataTable[i]);
94        }
95        free(dataTable);
96    }
97    dataTable = NULL;
98}
99
100/*Tests  for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
101static void TestStringFunctions()
102{
103    int32_t i,j,k;
104    UChar temp[512];
105    UChar nullTemp[512];
106    char test[512];
107    char tempOut[512];
108
109    setUpDataTable();
110
111    log_verbose("Testing u_strlen()\n");
112    if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
113        log_err("There is an error in u_strlen()");
114
115    log_verbose("Testing u_memcpy() and u_memcmp()\n");
116
117    for(i=0;i<3;++i)
118    {
119        for(j=0;j<4;++j)
120        {
121            log_verbose("Testing  %s\n", u_austrcpy(tempOut, dataTable[i][j]));
122            temp[0] = 0;
123            temp[7] = 0xA4; /* Mark the end */
124            u_memcpy(temp,dataTable[i][j], 7);
125
126            if(temp[7] != 0xA4)
127                log_err("an error occured in u_memcpy()\n");
128            if(u_memcmp(temp, dataTable[i][j], 7)!=0)
129                log_err("an error occured in u_memcpy() or u_memcmp()\n");
130        }
131    }
132    if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
133        log_err("an error occured in u_memcmp()\n");
134
135    log_verbose("Testing u_memset()\n");
136    nullTemp[0] = 0;
137    nullTemp[7] = 0;
138    u_memset(nullTemp, 0xa4, 7);
139    for (i = 0; i < 7; i++) {
140        if(nullTemp[i] != 0xa4) {
141            log_err("an error occured in u_memset()\n");
142        }
143    }
144    if(nullTemp[7] != 0) {
145        log_err("u_memset() went too far\n");
146    }
147
148    u_memset(nullTemp, 0, 7);
149    nullTemp[7] = 0xa4;
150    temp[7] = 0;
151    u_memcpy(temp,nullTemp, 7);
152    if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
153        log_err("an error occured in u_memcpy() or u_memcmp()\n");
154
155
156    log_verbose("Testing u_memmove()\n");
157    for (i = 0; i < 7; i++) {
158        temp[i] = (UChar)i;
159    }
160    u_memmove(temp + 1, temp, 7);
161    if(temp[0] != 0) {
162        log_err("an error occured in u_memmove()\n");
163    }
164    for (i = 1; i <= 7; i++) {
165        if(temp[i] != (i - 1)) {
166            log_err("an error occured in u_memmove()\n");
167        }
168    }
169
170    log_verbose("Testing u_strcpy() and u_strcmp()\n");
171
172    for(i=0;i<3;++i)
173    {
174        for(j=0;j<4;++j)
175        {
176            log_verbose("Testing  %s\n", u_austrcpy(tempOut, dataTable[i][j]));
177            temp[0] = 0;
178            u_strcpy(temp,dataTable[i][j]);
179
180            if(u_strcmp(temp,dataTable[i][j])!=0)
181                log_err("something threw an error in u_strcpy() or u_strcmp()\n");
182        }
183    }
184    if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
185        log_err("an error occured in u_memcmp()\n");
186
187    log_verbose("testing u_strcat()\n");
188    i=0;
189    for(j=0; j<2;++j)
190    {
191        u_uastrcpy(temp, "");
192        u_strcpy(temp,dataTable[i][j]);
193        u_strcat(temp,dataTable[i+1][j]);
194        if(u_strcmp(temp,dataTable[i+2][j])!=0)
195            log_err("something threw an error in u_strcat()\n");
196
197    }
198    log_verbose("Testing u_strncmp()\n");
199    for(i=0,j=0;j<4; ++j)
200    {
201        k=u_strlen(dataTable[i][j]);
202        if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
203            log_err("Something threw an error in u_strncmp\n");
204    }
205    if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
206        log_err("an error occured in u_memcmp()\n");
207
208
209    log_verbose("Testing u_strncat\n");
210    for(i=0,j=0;j<4; ++j)
211    {
212        k=u_strlen(dataTable[i][j]);
213
214        u_uastrcpy(temp,"");
215
216        if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
217            log_err("something threw an error in u_strncat or u_uastrcpy()\n");
218
219    }
220
221    log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
222    for(i=2,j=0;j<4; ++j)
223    {
224        k=u_strlen(dataTable[i][j]);
225        u_strncpy(temp, dataTable[i][j],k);
226        temp[k] = 0xa4;
227
228        if(u_strncmp(temp, dataTable[i][j],k)!=0)
229            log_err("something threw an error in u_strncpy()\n");
230
231        if(temp[k] != 0xa4)
232            log_err("something threw an error in u_strncpy()\n");
233
234        u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
235        u_uastrncpy(temp, raw[i][j], k-1);
236        if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
237            log_err("something threw an error in u_uastrncpy(k-1)\n");
238
239        if(temp[k-1] != 0x3F)
240            log_err("something threw an error in u_uastrncpy(k-1)\n");
241
242        u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
243        u_uastrncpy(temp, raw[i][j], k+1);
244        if(u_strcmp(temp, dataTable[i][j])!=0)
245            log_err("something threw an error in u_uastrncpy(k+1)\n");
246
247        if(temp[k] != 0)
248            log_err("something threw an error in u_uastrncpy(k+1)\n");
249
250        u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
251        u_uastrncpy(temp, raw[i][j], k);
252        if(u_strncmp(temp, dataTable[i][j], k)!=0)
253            log_err("something threw an error in u_uastrncpy(k)\n");
254
255        if(temp[k] != 0x3F)
256            log_err("something threw an error in u_uastrncpy(k)\n");
257    }
258
259    log_verbose("Testing u_strchr() and u_memchr()\n");
260
261    for(i=2,j=0;j<4;j++)
262    {
263        UChar saveVal = dataTable[i][j][0];
264        UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
265        int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
266
267        log_verbose("%s ", u_austrcpy(tempOut, findPtr));
268
269        if (findPtr == NULL || *findPtr != 0x005F) {
270            log_err("u_strchr can't find '_' in the string\n");
271        }
272
273        findPtr = u_strchr32(dataTable[i][j], 0x005F);
274        if (findPtr == NULL || *findPtr != 0x005F) {
275            log_err("u_strchr32 can't find '_' in the string\n");
276        }
277
278        findPtr = u_strchr(dataTable[i][j], 0);
279        if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
280            log_err("u_strchr can't find NULL in the string\n");
281        }
282
283        findPtr = u_strchr32(dataTable[i][j], 0);
284        if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
285            log_err("u_strchr32 can't find NULL in the string\n");
286        }
287
288        findPtr = u_memchr(dataTable[i][j], 0, dataSize);
289        if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
290            log_err("u_memchr can't find NULL in the string\n");
291        }
292
293        findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
294        if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
295            log_err("u_memchr32 can't find NULL in the string\n");
296        }
297
298        dataTable[i][j][0] = 0;
299        /* Make sure we skip over the NULL termination */
300        findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
301        if (findPtr == NULL || *findPtr != 0x005F) {
302            log_err("u_memchr can't find '_' in the string\n");
303        }
304
305        findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
306        if (findPtr == NULL || *findPtr != 0x005F) {
307            log_err("u_memchr32 can't find '_' in the string\n");
308        }
309        findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
310        if (findPtr != NULL) {
311            log_err("Should have found NULL when the character is not there.\n");
312        }
313        dataTable[i][j][0] = saveVal;   /* Put it back for the other tests */
314    }
315
316    /*
317     * test that u_strchr32()
318     * does not find surrogate code points when they are part of matched pairs
319     * (= part of supplementary code points)
320     * Jitterbug 1542
321     */
322    {
323        static const UChar s[]={
324            /*   0       1       2       3       4       5       6       7       8  9 */
325            0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
326        };
327
328        if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
329            log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
330        }
331        if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
332            log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
333        }
334    }
335
336    log_verbose("Testing u_austrcpy()");
337    u_austrcpy(test,dataTable[0][0]);
338    if(strcmp(test,raw[0][0])!=0)
339        log_err("There is an error in u_austrcpy()");
340
341
342    log_verbose("Testing u_strtok_r()");
343    {
344        const char tokString[] = "  ,  1 2 3  AHHHHH! 5.5 6 7    ,        8\n";
345        const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
346        UChar delimBuf[sizeof(test)];
347        UChar currTokenBuf[sizeof(tokString)];
348        UChar *state;
349        uint32_t currToken = 0;
350        UChar *ptr;
351
352        u_uastrcpy(temp, tokString);
353        u_uastrcpy(delimBuf, " ");
354
355        ptr = u_strtok_r(temp, delimBuf, &state);
356        u_uastrcpy(delimBuf, " ,");
357        while (ptr != NULL) {
358            u_uastrcpy(currTokenBuf, tokens[currToken]);
359            if (u_strcmp(ptr, currTokenBuf) != 0) {
360                log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
361            }
362            ptr = u_strtok_r(NULL, delimBuf, &state);
363            currToken++;
364        }
365
366        if (currToken != sizeof(tokens)/sizeof(tokens[0])) {
367            log_err("Didn't get correct number of tokens\n");
368        }
369        state = delimBuf;       /* Give it an "invalid" saveState */
370        u_uastrcpy(currTokenBuf, "");
371        if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
372            log_err("Didn't get NULL for empty string\n");
373        }
374        if (state != NULL) {
375            log_err("State should be NULL for empty string\n");
376        }
377        state = delimBuf;       /* Give it an "invalid" saveState */
378        u_uastrcpy(currTokenBuf, ", ,");
379        if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
380            log_err("Didn't get NULL for a string of delimiters\n");
381        }
382        if (state != NULL) {
383            log_err("State should be NULL for a string of delimiters\n");
384        }
385
386        state = delimBuf;       /* Give it an "invalid" saveState */
387        u_uastrcpy(currTokenBuf, "q, ,");
388        if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
389            log_err("Got NULL for a string that does not begin with delimiters\n");
390        }
391        if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
392            log_err("Didn't get NULL for a string that ends in delimiters\n");
393        }
394        if (state != NULL) {
395            log_err("State should be NULL for empty string\n");
396        }
397
398        state = delimBuf;       /* Give it an "invalid" saveState */
399        u_uastrcpy(currTokenBuf, tokString);
400        u_uastrcpy(temp, tokString);
401        u_uastrcpy(delimBuf, "q");  /* Give it a delimiter that it can't find. */
402        ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
403        if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
404            log_err("Should have recieved the same string when there are no delimiters\n");
405        }
406        if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
407            log_err("Should not have found another token in a one token string\n");
408        }
409    }
410
411    /* test u_strcmpCodePointOrder() */
412    {
413        /* these strings are in ascending order */
414        static const UChar strings[][4]={
415            { 0x61, 0 },                    /* U+0061 */
416            { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
417            { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
418            { 0xd800, 0 },                  /* U+d800 */
419            { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
420            { 0xdfff, 0 },                  /* U+dfff */
421            { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
422            { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
423            { 0xd800, 0xdc02, 0 },          /* U+10002 */
424            { 0xd84d, 0xdc56, 0 }           /* U+23456 */
425        };
426
427        UCharIterator iter1, iter2;
428        int32_t len1, len2, r1, r2;
429
430        for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) {
431            if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
432                log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
433            }
434            if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
435                log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
436            }
437
438            /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
439            if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
440                log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
441            }
442
443            /* test u_strCompare(TRUE) */
444            len1=u_strlen(strings[i]);
445            len2=u_strlen(strings[i+1]);
446            if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 ||
447                u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 ||
448                u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 ||
449                u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0
450            ) {
451                log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
452            }
453
454            /* test u_strCompare(FALSE) */
455            r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
456            r2=u_strcmp(strings[i], strings[i+1]);
457            if(_SIGN(r1)!=_SIGN(r2)) {
458                log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
459            }
460
461            /* test u_strCompareIter() */
462            uiter_setString(&iter1, strings[i], len1);
463            uiter_setString(&iter2, strings[i+1], len2);
464            if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
465                log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
466            }
467            r1=u_strCompareIter(&iter1, &iter2, FALSE);
468            if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
469                log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
470            }
471        }
472    }
473
474    cleanUpDataTable();
475}
476
477static void TestStringSearching()
478{
479    const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
480    const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
481    const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
482    const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
483    const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
484    const UChar surrMatchSet4[] = {0x0000};
485    const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
486    const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
487    const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0};   /* has partial surrogate */
488    const UChar
489        empty[] = { 0 },
490        a[] = { 0x61, 0 },
491        ab[] = { 0x61, 0x62, 0 },
492        ba[] = { 0x62, 0x61, 0 },
493        abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
494        cd[] = { 0x63, 0x64, 0 },
495        dc[] = { 0x64, 0x63, 0 },
496        cdh[] = { 0x63, 0x64, 0x68, 0 },
497        f[] = { 0x66, 0 },
498        fg[] = { 0x66, 0x67, 0 },
499        gf[] = { 0x67, 0x66, 0 };
500
501    log_verbose("Testing u_strpbrk()");
502
503    if (u_strpbrk(testString, a) != &testString[0]) {
504        log_err("u_strpbrk couldn't find first letter a.\n");
505    }
506    if (u_strpbrk(testString, dc) != &testString[2]) {
507        log_err("u_strpbrk couldn't find d or c.\n");
508    }
509    if (u_strpbrk(testString, cd) != &testString[2]) {
510        log_err("u_strpbrk couldn't find c or d.\n");
511    }
512    if (u_strpbrk(testString, cdh) != &testString[2]) {
513        log_err("u_strpbrk couldn't find c, d or h.\n");
514    }
515    if (u_strpbrk(testString, f) != NULL) {
516        log_err("u_strpbrk didn't return NULL for \"f\".\n");
517    }
518    if (u_strpbrk(testString, fg) != NULL) {
519        log_err("u_strpbrk didn't return NULL for \"fg\".\n");
520    }
521    if (u_strpbrk(testString, gf) != NULL) {
522        log_err("u_strpbrk didn't return NULL for \"gf\".\n");
523    }
524    if (u_strpbrk(testString, empty) != NULL) {
525        log_err("u_strpbrk didn't return NULL for \"\".\n");
526    }
527
528    log_verbose("Testing u_strpbrk() with surrogates");
529
530    if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
531        log_err("u_strpbrk couldn't find first letter a.\n");
532    }
533    if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
534        log_err("u_strpbrk couldn't find d or c.\n");
535    }
536    if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
537        log_err("u_strpbrk couldn't find c or d.\n");
538    }
539    if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
540        log_err("u_strpbrk couldn't find c, d or h.\n");
541    }
542    if (u_strpbrk(testSurrogateString, f) != NULL) {
543        log_err("u_strpbrk didn't return NULL for \"f\".\n");
544    }
545    if (u_strpbrk(testSurrogateString, fg) != NULL) {
546        log_err("u_strpbrk didn't return NULL for \"fg\".\n");
547    }
548    if (u_strpbrk(testSurrogateString, gf) != NULL) {
549        log_err("u_strpbrk didn't return NULL for \"gf\".\n");
550    }
551    if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
552        log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
553    }
554    if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
555        log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
556    }
557    if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
558        log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
559    }
560    if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
561        log_err("u_strpbrk should have returned NULL for empty string.\n");
562    }
563    if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
564        log_err("u_strpbrk should have found bad surrogate.\n");
565    }
566
567    log_verbose("Testing u_strcspn()");
568
569    if (u_strcspn(testString, a) != 0) {
570        log_err("u_strcspn couldn't find first letter a.\n");
571    }
572    if (u_strcspn(testString, dc) != 2) {
573        log_err("u_strcspn couldn't find d or c.\n");
574    }
575    if (u_strcspn(testString, cd) != 2) {
576        log_err("u_strcspn couldn't find c or d.\n");
577    }
578    if (u_strcspn(testString, cdh) != 2) {
579        log_err("u_strcspn couldn't find c, d or h.\n");
580    }
581    if (u_strcspn(testString, f) != u_strlen(testString)) {
582        log_err("u_strcspn didn't return NULL for \"f\".\n");
583    }
584    if (u_strcspn(testString, fg) != u_strlen(testString)) {
585        log_err("u_strcspn didn't return NULL for \"fg\".\n");
586    }
587    if (u_strcspn(testString, gf) != u_strlen(testString)) {
588        log_err("u_strcspn didn't return NULL for \"gf\".\n");
589    }
590
591    log_verbose("Testing u_strcspn() with surrogates");
592
593    if (u_strcspn(testSurrogateString, a) != 1) {
594        log_err("u_strcspn couldn't find first letter a.\n");
595    }
596    if (u_strcspn(testSurrogateString, dc) != 5) {
597        log_err("u_strcspn couldn't find d or c.\n");
598    }
599    if (u_strcspn(testSurrogateString, cd) != 5) {
600        log_err("u_strcspn couldn't find c or d.\n");
601    }
602    if (u_strcspn(testSurrogateString, cdh) != 5) {
603        log_err("u_strcspn couldn't find c, d or h.\n");
604    }
605    if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
606        log_err("u_strcspn didn't return NULL for \"f\".\n");
607    }
608    if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
609        log_err("u_strcspn didn't return NULL for \"fg\".\n");
610    }
611    if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
612        log_err("u_strcspn didn't return NULL for \"gf\".\n");
613    }
614    if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
615        log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
616    }
617    if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
618        log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
619    }
620    if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
621        log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
622    }
623    if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
624        log_err("u_strcspn should have returned strlen for empty string.\n");
625    }
626
627
628    log_verbose("Testing u_strspn()");
629
630    if (u_strspn(testString, a) != 1) {
631        log_err("u_strspn couldn't skip first letter a.\n");
632    }
633    if (u_strspn(testString, ab) != 2) {
634        log_err("u_strspn couldn't skip a or b.\n");
635    }
636    if (u_strspn(testString, ba) != 2) {
637        log_err("u_strspn couldn't skip a or b.\n");
638    }
639    if (u_strspn(testString, f) != 0) {
640        log_err("u_strspn didn't return 0 for \"f\".\n");
641    }
642    if (u_strspn(testString, dc) != 0) {
643        log_err("u_strspn couldn't find first letter a (skip d or c).\n");
644    }
645    if (u_strspn(testString, abcd) != u_strlen(testString)) {
646        log_err("u_strspn couldn't skip over the whole string.\n");
647    }
648    if (u_strspn(testString, empty) != 0) {
649        log_err("u_strspn should have returned 0 for empty string.\n");
650    }
651
652    log_verbose("Testing u_strspn() with surrogates");
653    if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
654        log_err("u_strspn couldn't skip 0xdbff or a.\n");
655    }
656    if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
657        log_err("u_strspn couldn't skip 0xdbff or a.\n");
658    }
659    if (u_strspn(testSurrogateString, f) != 0) {
660        log_err("u_strspn couldn't skip d or c (skip first letter).\n");
661    }
662    if (u_strspn(testSurrogateString, dc) != 0) {
663        log_err("u_strspn couldn't skip d or c (skip first letter).\n");
664    }
665    if (u_strspn(testSurrogateString, cd) != 0) {
666        log_err("u_strspn couldn't skip d or c (skip first letter).\n");
667    }
668    if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
669        log_err("u_strspn couldn't skip whole string.\n");
670    }
671    if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
672        log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
673    }
674    if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
675        log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
676    }
677    if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
678        log_err("u_strspn should have returned 0 for empty string.\n");
679    }
680}
681
682/*
683 * All binary Unicode string searches should behave the same for equivalent input.
684 * See Jitterbug 2145.
685 * There are some new functions, too - just test them all.
686 */
687static void
688TestSurrogateSearching() {
689    static const UChar s[]={
690        /* 0       1       2     3       4     5       6     7       8       9    10 11 */
691        0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
692    }, sub_a[]={
693        0x61, 0
694    }, sub_b[]={
695        0x62, 0
696    }, sub_lead[]={
697        0xd801, 0
698    }, sub_trail[]={
699        0xdc02, 0
700    }, sub_supp[]={
701        0xd801, 0xdc02, 0
702    }, sub_supp2[]={
703        0xd801, 0xdc03, 0
704    }, sub_a_lead[]={
705        0x61, 0xd801, 0
706    }, sub_trail_a[]={
707        0xdc02, 0x61, 0
708    }, sub_aba[]={
709        0x61, 0x62, 0x61, 0
710    };
711    static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
712    static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
713
714    const UChar *first, *last;
715
716    /* search for NUL code point: find end of string */
717    first=s+u_strlen(s);
718
719    if(
720        first!=u_strchr(s, nul) ||
721        first!=u_strchr32(s, nul) ||
722        first!=u_memchr(s, nul, LENGTHOF(s)) ||
723        first!=u_memchr32(s, nul, LENGTHOF(s)) ||
724        first!=u_strrchr(s, nul) ||
725        first!=u_strrchr32(s, nul) ||
726        first!=u_memrchr(s, nul, LENGTHOF(s)) ||
727        first!=u_memrchr32(s, nul, LENGTHOF(s))
728    ) {
729        log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
730    }
731
732    /* search for empty substring: find beginning of string */
733    if(
734        s!=u_strstr(s, &nul) ||
735        s!=u_strFindFirst(s, -1, &nul, -1) ||
736        s!=u_strFindFirst(s, -1, &nul, 0) ||
737        s!=u_strFindFirst(s, LENGTHOF(s), &nul, -1) ||
738        s!=u_strFindFirst(s, LENGTHOF(s), &nul, 0) ||
739        s!=u_strrstr(s, &nul) ||
740        s!=u_strFindLast(s, -1, &nul, -1) ||
741        s!=u_strFindLast(s, -1, &nul, 0) ||
742        s!=u_strFindLast(s, LENGTHOF(s), &nul, -1) ||
743        s!=u_strFindLast(s, LENGTHOF(s), &nul, 0)
744    ) {
745        log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
746    }
747
748    /* find 'a' in s[1..10[ */
749    first=s+3;
750    last=s+7;
751    if(
752        first!=u_strchr(s+1, a) ||
753        first!=u_strchr32(s+1, a) ||
754        first!=u_memchr(s+1, a, 9) ||
755        first!=u_memchr32(s+1, a, 9) ||
756        first!=u_strstr(s+1, sub_a) ||
757        first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
758        first!=u_strFindFirst(s+1, -1, &a, 1) ||
759        first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
760        first!=u_strFindFirst(s+1, 9, &a, 1) ||
761        (s+10)!=u_strrchr(s+1, a) ||
762        (s+10)!=u_strrchr32(s+1, a) ||
763        last!=u_memrchr(s+1, a, 9) ||
764        last!=u_memrchr32(s+1, a, 9) ||
765        (s+10)!=u_strrstr(s+1, sub_a) ||
766        (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
767        (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
768        last!=u_strFindLast(s+1, 9, sub_a, -1) ||
769        last!=u_strFindLast(s+1, 9, &a, 1)
770    ) {
771        log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
772    }
773
774    /* do not find 'b' in s[1..10[ */
775    if(
776        NULL!=u_strchr(s+1, b) ||
777        NULL!=u_strchr32(s+1, b) ||
778        NULL!=u_memchr(s+1, b, 9) ||
779        NULL!=u_memchr32(s+1, b, 9) ||
780        NULL!=u_strstr(s+1, sub_b) ||
781        NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
782        NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
783        NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
784        NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
785        NULL!=u_strrchr(s+1, b) ||
786        NULL!=u_strrchr32(s+1, b) ||
787        NULL!=u_memrchr(s+1, b, 9) ||
788        NULL!=u_memrchr32(s+1, b, 9) ||
789        NULL!=u_strrstr(s+1, sub_b) ||
790        NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
791        NULL!=u_strFindLast(s+1, -1, &b, 1) ||
792        NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
793        NULL!=u_strFindLast(s+1, 9, &b, 1)
794    ) {
795        log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
796    }
797
798    /* do not find a non-code point in s[1..10[ */
799    if(
800        NULL!=u_strchr32(s+1, ill) ||
801        NULL!=u_memchr32(s+1, ill, 9) ||
802        NULL!=u_strrchr32(s+1, ill) ||
803        NULL!=u_memrchr32(s+1, ill, 9)
804    ) {
805        log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
806    }
807
808    /* find U+d801 in s[1..10[ */
809    first=s+6;
810    if(
811        first!=u_strchr(s+1, lead) ||
812        first!=u_strchr32(s+1, lead) ||
813        first!=u_memchr(s+1, lead, 9) ||
814        first!=u_memchr32(s+1, lead, 9) ||
815        first!=u_strstr(s+1, sub_lead) ||
816        first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
817        first!=u_strFindFirst(s+1, -1, &lead, 1) ||
818        first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
819        first!=u_strFindFirst(s+1, 9, &lead, 1) ||
820        first!=u_strrchr(s+1, lead) ||
821        first!=u_strrchr32(s+1, lead) ||
822        first!=u_memrchr(s+1, lead, 9) ||
823        first!=u_memrchr32(s+1, lead, 9) ||
824        first!=u_strrstr(s+1, sub_lead) ||
825        first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
826        first!=u_strFindLast(s+1, -1, &lead, 1) ||
827        first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
828        first!=u_strFindLast(s+1, 9, &lead, 1)
829    ) {
830        log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
831    }
832
833    /* find U+dc02 in s[1..10[ */
834    first=s+4;
835    if(
836        first!=u_strchr(s+1, trail) ||
837        first!=u_strchr32(s+1, trail) ||
838        first!=u_memchr(s+1, trail, 9) ||
839        first!=u_memchr32(s+1, trail, 9) ||
840        first!=u_strstr(s+1, sub_trail) ||
841        first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
842        first!=u_strFindFirst(s+1, -1, &trail, 1) ||
843        first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
844        first!=u_strFindFirst(s+1, 9, &trail, 1) ||
845        first!=u_strrchr(s+1, trail) ||
846        first!=u_strrchr32(s+1, trail) ||
847        first!=u_memrchr(s+1, trail, 9) ||
848        first!=u_memrchr32(s+1, trail, 9) ||
849        first!=u_strrstr(s+1, sub_trail) ||
850        first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
851        first!=u_strFindLast(s+1, -1, &trail, 1) ||
852        first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
853        first!=u_strFindLast(s+1, 9, &trail, 1)
854    ) {
855        log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
856    }
857
858    /* find U+10402 in s[1..10[ */
859    first=s+1;
860    last=s+8;
861    if(
862        first!=u_strchr32(s+1, supp) ||
863        first!=u_memchr32(s+1, supp, 9) ||
864        first!=u_strstr(s+1, sub_supp) ||
865        first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
866        first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
867        first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
868        first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
869        last!=u_strrchr32(s+1, supp) ||
870        last!=u_memrchr32(s+1, supp, 9) ||
871        last!=u_strrstr(s+1, sub_supp) ||
872        last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
873        last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
874        last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
875        last!=u_strFindLast(s+1, 9, sub_supp, 2)
876    ) {
877        log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
878    }
879
880    /* do not find U+10402 in a single UChar */
881    if(
882        NULL!=u_memchr32(s+1, supp, 1) ||
883        NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
884        NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
885        NULL!=u_memrchr32(s+1, supp, 1) ||
886        NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
887        NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
888        NULL!=u_memrchr32(s+2, supp, 1) ||
889        NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
890        NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
891    ) {
892        log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
893    }
894
895    /* do not find U+10403 in s[1..10[ */
896    if(
897        NULL!=u_strchr32(s+1, supp2) ||
898        NULL!=u_memchr32(s+1, supp2, 9) ||
899        NULL!=u_strstr(s+1, sub_supp2) ||
900        NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
901        NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
902        NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
903        NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
904        NULL!=u_strrchr32(s+1, supp2) ||
905        NULL!=u_memrchr32(s+1, supp2, 9) ||
906        NULL!=u_strrstr(s+1, sub_supp2) ||
907        NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
908        NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
909        NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
910        NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
911    ) {
912        log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
913    }
914
915    /* find <0061 d801> in s[1..10[ */
916    first=s+5;
917    if(
918        first!=u_strstr(s+1, sub_a_lead) ||
919        first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
920        first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
921        first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
922        first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
923        first!=u_strrstr(s+1, sub_a_lead) ||
924        first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
925        first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
926        first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
927        first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
928    ) {
929        log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
930    }
931
932    /* find <dc02 0061> in s[1..10[ */
933    first=s+4;
934    if(
935        first!=u_strstr(s+1, sub_trail_a) ||
936        first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
937        first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
938        first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
939        first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
940        first!=u_strrstr(s+1, sub_trail_a) ||
941        first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
942        first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
943        first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
944        first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
945    ) {
946        log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
947    }
948
949    /* do not find "aba" in s[1..10[ */
950    if(
951        NULL!=u_strstr(s+1, sub_aba) ||
952        NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
953        NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
954        NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
955        NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
956        NULL!=u_strrstr(s+1, sub_aba) ||
957        NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
958        NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
959        NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
960        NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
961    ) {
962        log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
963    }
964}
965
966static void TestStringCopy()
967{
968    UChar temp[40];
969    UChar *result=0;
970    UChar subString[5];
971    UChar uchars[]={0x61, 0x62, 0x63, 0x00};
972    char  charOut[40];
973    char  chars[]="abc";    /* needs default codepage */
974
975    log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
976
977    u_uastrcpy(temp, "abc");
978    if(u_strcmp(temp, uchars) != 0) {
979        log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
980    }
981
982    temp[0] = 0xFB; /* load garbage into it */
983    temp[1] = 0xFB;
984    temp[2] = 0xFB;
985    temp[3] = 0xFB;
986
987    u_uastrncpy(temp, "abcabcabc", 3);
988    if(u_strncmp(uchars, temp, 3) != 0){
989        log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
990    }
991    if(temp[3] != 0xFB) {
992        log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
993    }
994
995    charOut[0] = (char)0x7B; /* load garbage into it */
996    charOut[1] = (char)0x7B;
997    charOut[2] = (char)0x7B;
998    charOut[3] = (char)0x7B;
999
1000    temp[0] = 0x0061;
1001    temp[1] = 0x0062;
1002    temp[2] = 0x0063;
1003    temp[3] = 0x0061;
1004    temp[4] = 0x0062;
1005    temp[5] = 0x0063;
1006    temp[6] = 0x0000;
1007
1008    u_austrncpy(charOut, temp, 3);
1009    if(strncmp(chars, charOut, 3) != 0){
1010        log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1011    }
1012    if(charOut[3] != (char)0x7B) {
1013        log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1014    }
1015
1016    /*Testing u_strchr()*/
1017    log_verbose("Testing u_strchr\n");
1018    temp[0]=0x42;
1019    temp[1]=0x62;
1020    temp[2]=0x62;
1021    temp[3]=0x63;
1022    temp[4]=0xd841;
1023    temp[5]=0xd841;
1024    temp[6]=0xdc02;
1025    temp[7]=0;
1026    result=u_strchr(temp, (UChar)0x62);
1027    if(result != temp+1){
1028        log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1029    }
1030    /*Testing u_strstr()*/
1031    log_verbose("Testing u_strstr\n");
1032    subString[0]=0x62;
1033    subString[1]=0x63;
1034    subString[2]=0;
1035    result=u_strstr(temp, subString);
1036    if(result != temp+2){
1037        log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1038    }
1039    result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1040    if(result != temp){
1041        log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1042    }
1043    result=u_strstr(subString, temp);
1044    if(result != NULL){
1045        log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1046    }
1047
1048    /*Testing u_strchr32*/
1049    log_verbose("Testing u_strchr32\n");
1050    result=u_strchr32(temp, (UChar32)0x62);
1051    if(result != temp+1){
1052        log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1053    }
1054    result=u_strchr32(temp, (UChar32)0xfb);
1055    if(result != NULL){
1056        log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1057    }
1058    result=u_strchr32(temp, (UChar32)0x20402);
1059    if(result != temp+5){
1060        log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1061    }
1062
1063    temp[7]=0xfc00;
1064    result=u_memchr32(temp, (UChar32)0x20402, 7);
1065    if(result != temp+5){
1066        log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1067    }
1068    result=u_memchr32(temp, (UChar32)0x20402, 6);
1069    if(result != NULL){
1070        log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1071    }
1072    result=u_memchr32(temp, (UChar32)0x20402, 1);
1073    if(result != NULL){
1074        log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1075    }
1076    result=u_memchr32(temp, (UChar32)0xfc00, 8);
1077    if(result != temp+7){
1078        log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1079    }
1080}
1081
1082/* test u_unescape() and u_unescapeAt() ------------------------------------- */
1083
1084static void
1085TestUnescape() {
1086    static UChar buffer[200];
1087
1088    static const char* input =
1089        "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1090
1091    static const UChar expect[]={
1092        0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1093        0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1094        0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1095        0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1096    };
1097    static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1;
1098    int32_t length;
1099
1100    /* test u_unescape() */
1101    length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0]));
1102    if(length!=explength || u_strcmp(buffer, expect)!=0) {
1103        log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1104                explength);
1105    }
1106
1107    /* try preflighting */
1108    length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0]));
1109    if(length!=explength || u_strcmp(buffer, expect)!=0) {
1110        log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1111    }
1112
1113    /* ### TODO: test u_unescapeAt() */
1114}
1115
1116/* test code point counting functions --------------------------------------- */
1117
1118/* reference implementation of u_strHasMoreChar32Than() */
1119static int32_t
1120_refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1121    int32_t count=u_countChar32(s, length);
1122    return count>number;
1123}
1124
1125/* compare the real function against the reference */
1126static void
1127_testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1128    if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1129        log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1130                i, length, number, u_strHasMoreChar32Than(s, length, number));
1131    }
1132}
1133
1134static void
1135TestCountChar32() {
1136    static const UChar string[]={
1137        0x61, 0x62, 0xd800, 0xdc00,
1138        0xd801, 0xdc01, 0x63, 0xd802,
1139        0x64, 0xdc03, 0x65, 0x66,
1140        0xd804, 0xdc04, 0xd805, 0xdc05,
1141        0x67
1142    };
1143    UChar buffer[100];
1144    int32_t i, length, number;
1145
1146    /* test u_strHasMoreChar32Than() with length>=0 */
1147    length=LENGTHOF(string);
1148    while(length>=0) {
1149        for(i=0; i<=length; ++i) {
1150            for(number=-1; number<=((length-i)+2); ++number) {
1151                _testStrHasMoreChar32Than(string+i, i, length-i, number);
1152            }
1153        }
1154        --length;
1155    }
1156
1157    /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1158    length=LENGTHOF(string);
1159    u_memcpy(buffer, string, length);
1160    while(length>=0) {
1161        buffer[length]=0;
1162        for(i=0; i<=length; ++i) {
1163            for(number=-1; number<=((length-i)+2); ++number) {
1164                _testStrHasMoreChar32Than(string+i, i, -1, number);
1165            }
1166        }
1167        --length;
1168    }
1169
1170    /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1171    for(length=-1; length<=1; ++length) {
1172        for(i=0; i<=length; ++i) {
1173            for(number=-2; number<=2; ++number) {
1174                _testStrHasMoreChar32Than(NULL, 0, length, number);
1175            }
1176        }
1177    }
1178}
1179
1180/* UCharIterator ------------------------------------------------------------ */
1181
1182/*
1183 * Compare results from two iterators, should be same.
1184 * Assume that the text is not empty and that
1185 * iteration start==0 and iteration limit==length.
1186 */
1187static void
1188compareIterators(UCharIterator *iter1, const char *n1,
1189                 UCharIterator *iter2, const char *n2) {
1190    int32_t i, pos1, pos2, middle, length;
1191    UChar32 c1, c2;
1192
1193    /* compare lengths */
1194    length=iter1->getIndex(iter1, UITER_LENGTH);
1195    pos2=iter2->getIndex(iter2, UITER_LENGTH);
1196    if(length!=pos2) {
1197        log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1198        return;
1199    }
1200
1201    /* set into the middle */
1202    middle=length/2;
1203
1204    pos1=iter1->move(iter1, middle, UITER_ZERO);
1205    if(pos1!=middle) {
1206        log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1207        return;
1208    }
1209
1210    pos2=iter2->move(iter2, middle, UITER_ZERO);
1211    if(pos2!=middle) {
1212        log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1213        return;
1214    }
1215
1216    /* test current() */
1217    c1=iter1->current(iter1);
1218    c2=iter2->current(iter2);
1219    if(c1!=c2) {
1220        log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1221        return;
1222    }
1223
1224    /* move forward 3 UChars */
1225    for(i=0; i<3; ++i) {
1226        c1=iter1->next(iter1);
1227        c2=iter2->next(iter2);
1228        if(c1!=c2) {
1229            log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1230            return;
1231        }
1232    }
1233
1234    /* move backward 5 UChars */
1235    for(i=0; i<5; ++i) {
1236        c1=iter1->previous(iter1);
1237        c2=iter2->previous(iter2);
1238        if(c1!=c2) {
1239            log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1240            return;
1241        }
1242    }
1243
1244    /* iterate forward from the beginning */
1245    pos1=iter1->move(iter1, 0, UITER_START);
1246    if(pos1<0) {
1247        log_err("%s->move(start) failed\n", n1);
1248        return;
1249    }
1250    if(!iter1->hasNext(iter1)) {
1251        log_err("%s->hasNext() at the start returns FALSE\n", n1);
1252        return;
1253    }
1254
1255    pos2=iter2->move(iter2, 0, UITER_START);
1256    if(pos2<0) {
1257        log_err("%s->move(start) failed\n", n2);
1258        return;
1259    }
1260    if(!iter2->hasNext(iter2)) {
1261        log_err("%s->hasNext() at the start returns FALSE\n", n2);
1262        return;
1263    }
1264
1265    do {
1266        c1=iter1->next(iter1);
1267        c2=iter2->next(iter2);
1268        if(c1!=c2) {
1269            log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1270            return;
1271        }
1272    } while(c1>=0);
1273
1274    if(iter1->hasNext(iter1)) {
1275        log_err("%s->hasNext() at the end returns TRUE\n", n1);
1276        return;
1277    }
1278    if(iter2->hasNext(iter2)) {
1279        log_err("%s->hasNext() at the end returns TRUE\n", n2);
1280        return;
1281    }
1282
1283    /* back to the middle */
1284    pos1=iter1->move(iter1, middle, UITER_ZERO);
1285    if(pos1!=middle) {
1286        log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1287        return;
1288    }
1289
1290    pos2=iter2->move(iter2, middle, UITER_ZERO);
1291    if(pos2!=middle) {
1292        log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1293        return;
1294    }
1295
1296    /* move to index 1 */
1297    pos1=iter1->move(iter1, 1, UITER_ZERO);
1298    if(pos1!=1) {
1299        log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1300        return;
1301    }
1302
1303    pos2=iter2->move(iter2, 1, UITER_ZERO);
1304    if(pos2!=1) {
1305        log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1306        return;
1307    }
1308
1309    /* iterate backward from the end */
1310    pos1=iter1->move(iter1, 0, UITER_LIMIT);
1311    if(pos1<0) {
1312        log_err("%s->move(limit) failed\n", n1);
1313        return;
1314    }
1315    if(!iter1->hasPrevious(iter1)) {
1316        log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
1317        return;
1318    }
1319
1320    pos2=iter2->move(iter2, 0, UITER_LIMIT);
1321    if(pos2<0) {
1322        log_err("%s->move(limit) failed\n", n2);
1323        return;
1324    }
1325    if(!iter2->hasPrevious(iter2)) {
1326        log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
1327        return;
1328    }
1329
1330    do {
1331        c1=iter1->previous(iter1);
1332        c2=iter2->previous(iter2);
1333        if(c1!=c2) {
1334            log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1335            return;
1336        }
1337    } while(c1>=0);
1338
1339    if(iter1->hasPrevious(iter1)) {
1340        log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
1341        return;
1342    }
1343    if(iter2->hasPrevious(iter2)) {
1344        log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
1345        return;
1346    }
1347}
1348
1349/*
1350 * Test the iterator's getState() and setState() functions.
1351 * iter1 and iter2 must be set up for the same iterator type and the same string
1352 * but may be physically different structs (different addresses).
1353 *
1354 * Assume that the text is not empty and that
1355 * iteration start==0 and iteration limit==length.
1356 * It must be 2<=middle<=length-2.
1357 */
1358static void
1359testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1360    UChar32 u[4];
1361
1362    UErrorCode errorCode;
1363    UChar32 c;
1364    uint32_t state;
1365    int32_t i, j;
1366
1367    /* get four UChars from the middle of the string */
1368    iter1->move(iter1, middle-2, UITER_ZERO);
1369    for(i=0; i<4; ++i) {
1370        c=iter1->next(iter1);
1371        if(c<0) {
1372            /* the test violates the assumptions, see comment above */
1373            log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1374            return;
1375        }
1376        u[i]=c;
1377    }
1378
1379    /* move to the middle and get the state */
1380    iter1->move(iter1, -2, UITER_CURRENT);
1381    state=uiter_getState(iter1);
1382
1383    /* set the state into the second iterator and compare the results */
1384    errorCode=U_ZERO_ERROR;
1385    uiter_setState(iter2, state, &errorCode);
1386    if(U_FAILURE(errorCode)) {
1387        log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1388        return;
1389    }
1390
1391    c=iter2->current(iter2);
1392    if(c!=u[2]) {
1393        log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1394    }
1395
1396    c=iter2->previous(iter2);
1397    if(c!=u[1]) {
1398        log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1399    }
1400
1401    iter2->move(iter2, 2, UITER_CURRENT);
1402    c=iter2->next(iter2);
1403    if(c!=u[3]) {
1404        log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1405    }
1406
1407    iter2->move(iter2, -3, UITER_CURRENT);
1408    c=iter2->previous(iter2);
1409    if(c!=u[0]) {
1410        log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1411    }
1412
1413    /* move the second iterator back to the middle */
1414    iter2->move(iter2, 1, UITER_CURRENT);
1415    iter2->next(iter2);
1416
1417    /* check that both are in the middle */
1418    i=iter1->getIndex(iter1, UITER_CURRENT);
1419    j=iter2->getIndex(iter2, UITER_CURRENT);
1420    if(i!=middle) {
1421        log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1422    }
1423    if(i!=j) {
1424        log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1425    }
1426
1427    /* compare lengths */
1428    i=iter1->getIndex(iter1, UITER_LENGTH);
1429    j=iter2->getIndex(iter2, UITER_LENGTH);
1430    if(i!=j) {
1431        log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1432    }
1433}
1434
1435static void
1436TestUCharIterator() {
1437    static const UChar text[]={
1438        0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1439    };
1440    char bytes[40];
1441
1442    UCharIterator iter, iter1, iter2;
1443    UConverter *cnv;
1444    UErrorCode errorCode;
1445    int32_t length;
1446
1447    /* simple API/code coverage - test NOOP UCharIterator */
1448    uiter_setString(&iter, NULL, 0);
1449    if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1450        iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1451        iter.hasNext(&iter) || iter.hasPrevious(&iter)
1452    ) {
1453        log_err("NOOP UCharIterator behaves unexpectedly\n");
1454    }
1455
1456    /* test get/set state */
1457    length=LENGTHOF(text)-1;
1458    uiter_setString(&iter1, text, -1);
1459    uiter_setString(&iter2, text, length);
1460    testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1461    testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1462
1463    /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1464    errorCode=U_ZERO_ERROR;
1465    u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1466    if(U_FAILURE(errorCode)) {
1467        log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1468        return;
1469    }
1470
1471    uiter_setString(&iter1, text, -1);
1472    uiter_setUTF8(&iter2, bytes, length);
1473    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1474
1475    /* try again with length=-1 */
1476    uiter_setUTF8(&iter2, bytes, -1);
1477    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1478
1479    /* test get/set state */
1480    length=LENGTHOF(text)-1;
1481    uiter_setUTF8(&iter1, bytes, -1);
1482    testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1483    testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1484
1485    /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1486    errorCode=U_ZERO_ERROR;
1487    cnv=ucnv_open("UTF-16BE", &errorCode);
1488    length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1489    ucnv_close(cnv);
1490    if(U_FAILURE(errorCode)) {
1491        log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1492        return;
1493    }
1494
1495    /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1496    bytes[length]=bytes[length+1]=0;
1497
1498    uiter_setString(&iter1, text, -1);
1499    uiter_setUTF16BE(&iter2, bytes, length);
1500    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1501
1502    /* try again with length=-1 */
1503    uiter_setUTF16BE(&iter2, bytes, -1);
1504    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1505
1506    /* try again after moving the bytes up one, and with length=-1 */
1507    memmove(bytes+1, bytes, length+2);
1508    uiter_setUTF16BE(&iter2, bytes+1, -1);
1509    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1510
1511    /* ### TODO test other iterators: CharacterIterator, Replaceable */
1512}
1513