1/*
2******************************************************************************
3*
4*   Copyright (C) 2002-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  custrtst.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2002oct09
14*   created by: Markus W. Scherer
15*
16*   Tests of ustring.h Unicode string API functions.
17*/
18
19#include "unicode/ustring.h"
20#include "unicode/ucnv.h"
21#include "unicode/uiter.h"
22#include "cintltst.h"
23#include <string.h>
24
25#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
26
27/* get the sign of an integer */
28#define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
29
30/* test setup --------------------------------------------------------------- */
31
32static void setUpDataTable(void);
33static void TestStringCopy(void);
34static void TestStringFunctions(void);
35static void TestStringSearching(void);
36static void TestSurrogateSearching(void);
37static void TestUnescape(void);
38static void TestCountChar32(void);
39static void TestUCharIterator(void);
40static void TestUNormIterator(void);
41static void TestBadUNormIterator(void);
42
43void addUStringTest(TestNode** root);
44
45void addUStringTest(TestNode** root)
46{
47    addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
48    addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
49    addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
50    addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
51    addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
52    addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
53    addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
54    addTest(root, &TestUNormIterator, "tsutil/custrtst/TestUNormIterator");
55    addTest(root, &TestBadUNormIterator, "tsutil/custrtst/TestBadUNormIterator");
56}
57
58/* test data for TestStringFunctions ---------------------------------------- */
59
60UChar*** dataTable = NULL;
61
62static const char* raw[3][4] = {
63
64    /* First String */
65    {   "English_",  "French_",   "Croatian_", "English_"},
66    /* Second String */
67    {   "United States",    "France",   "Croatia",  "Unites States"},
68
69   /* Concatenated string */
70    {   "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
71};
72
73static void setUpDataTable()
74{
75    int32_t i,j;
76    if(dataTable == NULL) {
77        dataTable = (UChar***)calloc(sizeof(UChar**),3);
78
79            for (i = 0; i < 3; i++) {
80              dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
81                for (j = 0; j < 4; j++){
82                    dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
83                    u_uastrcpy(dataTable[i][j],raw[i][j]);
84                }
85            }
86    }
87}
88
89static void cleanUpDataTable()
90{
91    int32_t i,j;
92    if(dataTable != NULL) {
93        for (i=0; i<3; i++) {
94            for(j = 0; j<4; j++) {
95                free(dataTable[i][j]);
96            }
97            free(dataTable[i]);
98        }
99        free(dataTable);
100    }
101    dataTable = NULL;
102}
103
104/*Tests  for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
105static void TestStringFunctions()
106{
107    int32_t i,j,k;
108    UChar temp[512];
109    UChar nullTemp[512];
110    char test[512];
111    char tempOut[512];
112
113    setUpDataTable();
114
115    log_verbose("Testing u_strlen()\n");
116    if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
117        log_err("There is an error in u_strlen()");
118
119    log_verbose("Testing u_memcpy() and u_memcmp()\n");
120
121    for(i=0;i<3;++i)
122    {
123        for(j=0;j<4;++j)
124        {
125            log_verbose("Testing  %s\n", u_austrcpy(tempOut, dataTable[i][j]));
126            temp[0] = 0;
127            temp[7] = 0xA4; /* Mark the end */
128            u_memcpy(temp,dataTable[i][j], 7);
129
130            if(temp[7] != 0xA4)
131                log_err("an error occured in u_memcpy()\n");
132            if(u_memcmp(temp, dataTable[i][j], 7)!=0)
133                log_err("an error occured in u_memcpy() or u_memcmp()\n");
134        }
135    }
136    if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
137        log_err("an error occured in u_memcmp()\n");
138
139    log_verbose("Testing u_memset()\n");
140    nullTemp[0] = 0;
141    nullTemp[7] = 0;
142    u_memset(nullTemp, 0xa4, 7);
143    for (i = 0; i < 7; i++) {
144        if(nullTemp[i] != 0xa4) {
145            log_err("an error occured in u_memset()\n");
146        }
147    }
148    if(nullTemp[7] != 0) {
149        log_err("u_memset() went too far\n");
150    }
151
152    u_memset(nullTemp, 0, 7);
153    nullTemp[7] = 0xa4;
154    temp[7] = 0;
155    u_memcpy(temp,nullTemp, 7);
156    if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
157        log_err("an error occured in u_memcpy() or u_memcmp()\n");
158
159
160    log_verbose("Testing u_memmove()\n");
161    for (i = 0; i < 7; i++) {
162        temp[i] = (UChar)i;
163    }
164    u_memmove(temp + 1, temp, 7);
165    if(temp[0] != 0) {
166        log_err("an error occured in u_memmove()\n");
167    }
168    for (i = 1; i <= 7; i++) {
169        if(temp[i] != (i - 1)) {
170            log_err("an error occured in u_memmove()\n");
171        }
172    }
173
174    log_verbose("Testing u_strcpy() and u_strcmp()\n");
175
176    for(i=0;i<3;++i)
177    {
178        for(j=0;j<4;++j)
179        {
180            log_verbose("Testing  %s\n", u_austrcpy(tempOut, dataTable[i][j]));
181            temp[0] = 0;
182            u_strcpy(temp,dataTable[i][j]);
183
184            if(u_strcmp(temp,dataTable[i][j])!=0)
185                log_err("something threw an error in u_strcpy() or u_strcmp()\n");
186        }
187    }
188    if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
189        log_err("an error occured in u_memcmp()\n");
190
191    log_verbose("testing u_strcat()\n");
192    i=0;
193    for(j=0; j<2;++j)
194    {
195        u_uastrcpy(temp, "");
196        u_strcpy(temp,dataTable[i][j]);
197        u_strcat(temp,dataTable[i+1][j]);
198        if(u_strcmp(temp,dataTable[i+2][j])!=0)
199            log_err("something threw an error in u_strcat()\n");
200
201    }
202    log_verbose("Testing u_strncmp()\n");
203    for(i=0,j=0;j<4; ++j)
204    {
205        k=u_strlen(dataTable[i][j]);
206        if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
207            log_err("Something threw an error in u_strncmp\n");
208    }
209    if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
210        log_err("an error occured in u_memcmp()\n");
211
212
213    log_verbose("Testing u_strncat\n");
214    for(i=0,j=0;j<4; ++j)
215    {
216        k=u_strlen(dataTable[i][j]);
217
218        u_uastrcpy(temp,"");
219
220        if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
221            log_err("something threw an error in u_strncat or u_uastrcpy()\n");
222
223    }
224
225    log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
226    for(i=2,j=0;j<4; ++j)
227    {
228        k=u_strlen(dataTable[i][j]);
229        u_strncpy(temp, dataTable[i][j],k);
230        temp[k] = 0xa4;
231
232        if(u_strncmp(temp, dataTable[i][j],k)!=0)
233            log_err("something threw an error in u_strncpy()\n");
234
235        if(temp[k] != 0xa4)
236            log_err("something threw an error in u_strncpy()\n");
237
238        u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
239        u_uastrncpy(temp, raw[i][j], k-1);
240        if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
241            log_err("something threw an error in u_uastrncpy(k-1)\n");
242
243        if(temp[k-1] != 0x3F)
244            log_err("something threw an error in u_uastrncpy(k-1)\n");
245
246        u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
247        u_uastrncpy(temp, raw[i][j], k+1);
248        if(u_strcmp(temp, dataTable[i][j])!=0)
249            log_err("something threw an error in u_uastrncpy(k+1)\n");
250
251        if(temp[k] != 0)
252            log_err("something threw an error in u_uastrncpy(k+1)\n");
253
254        u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
255        u_uastrncpy(temp, raw[i][j], k);
256        if(u_strncmp(temp, dataTable[i][j], k)!=0)
257            log_err("something threw an error in u_uastrncpy(k)\n");
258
259        if(temp[k] != 0x3F)
260            log_err("something threw an error in u_uastrncpy(k)\n");
261    }
262
263    log_verbose("Testing u_strchr() and u_memchr()\n");
264
265    for(i=2,j=0;j<4;j++)
266    {
267        UChar saveVal = dataTable[i][j][0];
268        UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
269        int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
270
271        log_verbose("%s ", u_austrcpy(tempOut, findPtr));
272
273        if (findPtr == NULL || *findPtr != 0x005F) {
274            log_err("u_strchr can't find '_' in the string\n");
275        }
276
277        findPtr = u_strchr32(dataTable[i][j], 0x005F);
278        if (findPtr == NULL || *findPtr != 0x005F) {
279            log_err("u_strchr32 can't find '_' in the string\n");
280        }
281
282        findPtr = u_strchr(dataTable[i][j], 0);
283        if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
284            log_err("u_strchr can't find NULL in the string\n");
285        }
286
287        findPtr = u_strchr32(dataTable[i][j], 0);
288        if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
289            log_err("u_strchr32 can't find NULL in the string\n");
290        }
291
292        findPtr = u_memchr(dataTable[i][j], 0, dataSize);
293        if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
294            log_err("u_memchr can't find NULL in the string\n");
295        }
296
297        findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
298        if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
299            log_err("u_memchr32 can't find NULL in the string\n");
300        }
301
302        dataTable[i][j][0] = 0;
303        /* Make sure we skip over the NULL termination */
304        findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
305        if (findPtr == NULL || *findPtr != 0x005F) {
306            log_err("u_memchr can't find '_' in the string\n");
307        }
308
309        findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
310        if (findPtr == NULL || *findPtr != 0x005F) {
311            log_err("u_memchr32 can't find '_' in the string\n");
312        }
313        findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
314        if (findPtr != NULL) {
315            log_err("Should have found NULL when the character is not there.\n");
316        }
317        dataTable[i][j][0] = saveVal;   /* Put it back for the other tests */
318    }
319
320    /*
321     * test that u_strchr32()
322     * does not find surrogate code points when they are part of matched pairs
323     * (= part of supplementary code points)
324     * Jitterbug 1542
325     */
326    {
327        static const UChar s[]={
328            /*   0       1       2       3       4       5       6       7       8  9 */
329            0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
330        };
331
332        if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
333            log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
334        }
335        if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
336            log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
337        }
338    }
339
340    log_verbose("Testing u_austrcpy()");
341    u_austrcpy(test,dataTable[0][0]);
342    if(strcmp(test,raw[0][0])!=0)
343        log_err("There is an error in u_austrcpy()");
344
345
346    log_verbose("Testing u_strtok_r()");
347    {
348        const char tokString[] = "  ,  1 2 3  AHHHHH! 5.5 6 7    ,        8\n";
349        const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
350        UChar delimBuf[sizeof(test)];
351        UChar currTokenBuf[sizeof(tokString)];
352        UChar *state;
353        uint32_t currToken = 0;
354        UChar *ptr;
355
356        u_uastrcpy(temp, tokString);
357        u_uastrcpy(delimBuf, " ");
358
359        ptr = u_strtok_r(temp, delimBuf, &state);
360        u_uastrcpy(delimBuf, " ,");
361        while (ptr != NULL) {
362            u_uastrcpy(currTokenBuf, tokens[currToken]);
363            if (u_strcmp(ptr, currTokenBuf) != 0) {
364                log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
365            }
366            ptr = u_strtok_r(NULL, delimBuf, &state);
367            currToken++;
368        }
369
370        if (currToken != sizeof(tokens)/sizeof(tokens[0])) {
371            log_err("Didn't get correct number of tokens\n");
372        }
373        state = delimBuf;       /* Give it an "invalid" saveState */
374        u_uastrcpy(currTokenBuf, "");
375        if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
376            log_err("Didn't get NULL for empty string\n");
377        }
378        if (state != NULL) {
379            log_err("State should be NULL for empty string\n");
380        }
381        state = delimBuf;       /* Give it an "invalid" saveState */
382        u_uastrcpy(currTokenBuf, ", ,");
383        if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
384            log_err("Didn't get NULL for a string of delimiters\n");
385        }
386        if (state != NULL) {
387            log_err("State should be NULL for a string of delimiters\n");
388        }
389
390        state = delimBuf;       /* Give it an "invalid" saveState */
391        u_uastrcpy(currTokenBuf, "q, ,");
392        if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
393            log_err("Got NULL for a string that does not begin with delimiters\n");
394        }
395        if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
396            log_err("Didn't get NULL for a string that ends in delimiters\n");
397        }
398        if (state != NULL) {
399            log_err("State should be NULL for empty string\n");
400        }
401
402        state = delimBuf;       /* Give it an "invalid" saveState */
403        u_uastrcpy(currTokenBuf, tokString);
404        u_uastrcpy(temp, tokString);
405        u_uastrcpy(delimBuf, "q");  /* Give it a delimiter that it can't find. */
406        ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
407        if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
408            log_err("Should have recieved the same string when there are no delimiters\n");
409        }
410        if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
411            log_err("Should not have found another token in a one token string\n");
412        }
413    }
414
415    /* test u_strcmpCodePointOrder() */
416    {
417        /* these strings are in ascending order */
418        static const UChar strings[][4]={
419            { 0x61, 0 },                    /* U+0061 */
420            { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
421            { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
422            { 0xd800, 0 },                  /* U+d800 */
423            { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
424            { 0xdfff, 0 },                  /* U+dfff */
425            { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
426            { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
427            { 0xd800, 0xdc02, 0 },          /* U+10002 */
428            { 0xd84d, 0xdc56, 0 }           /* U+23456 */
429        };
430
431        UCharIterator iter1, iter2;
432        int32_t len1, len2, r1, r2;
433
434        for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) {
435            if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
436                log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
437            }
438            if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
439                log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
440            }
441
442            /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
443            if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
444                log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
445            }
446
447            /* test u_strCompare(TRUE) */
448            len1=u_strlen(strings[i]);
449            len2=u_strlen(strings[i+1]);
450            if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 ||
451                u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 ||
452                u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 ||
453                u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0
454            ) {
455                log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
456            }
457
458            /* test u_strCompare(FALSE) */
459            r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
460            r2=u_strcmp(strings[i], strings[i+1]);
461            if(_SIGN(r1)!=_SIGN(r2)) {
462                log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
463            }
464
465            /* test u_strCompareIter() */
466            uiter_setString(&iter1, strings[i], len1);
467            uiter_setString(&iter2, strings[i+1], len2);
468            if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
469                log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
470            }
471            r1=u_strCompareIter(&iter1, &iter2, FALSE);
472            if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
473                log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
474            }
475        }
476    }
477
478    cleanUpDataTable();
479}
480
481static void TestStringSearching()
482{
483    const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
484    const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
485    const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
486    const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
487    const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
488    const UChar surrMatchSet4[] = {0x0000};
489    const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
490    const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
491    const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0};   /* has partial surrogate */
492    const UChar
493        empty[] = { 0 },
494        a[] = { 0x61, 0 },
495        ab[] = { 0x61, 0x62, 0 },
496        ba[] = { 0x62, 0x61, 0 },
497        abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
498        cd[] = { 0x63, 0x64, 0 },
499        dc[] = { 0x64, 0x63, 0 },
500        cdh[] = { 0x63, 0x64, 0x68, 0 },
501        f[] = { 0x66, 0 },
502        fg[] = { 0x66, 0x67, 0 },
503        gf[] = { 0x67, 0x66, 0 };
504
505    log_verbose("Testing u_strpbrk()");
506
507    if (u_strpbrk(testString, a) != &testString[0]) {
508        log_err("u_strpbrk couldn't find first letter a.\n");
509    }
510    if (u_strpbrk(testString, dc) != &testString[2]) {
511        log_err("u_strpbrk couldn't find d or c.\n");
512    }
513    if (u_strpbrk(testString, cd) != &testString[2]) {
514        log_err("u_strpbrk couldn't find c or d.\n");
515    }
516    if (u_strpbrk(testString, cdh) != &testString[2]) {
517        log_err("u_strpbrk couldn't find c, d or h.\n");
518    }
519    if (u_strpbrk(testString, f) != NULL) {
520        log_err("u_strpbrk didn't return NULL for \"f\".\n");
521    }
522    if (u_strpbrk(testString, fg) != NULL) {
523        log_err("u_strpbrk didn't return NULL for \"fg\".\n");
524    }
525    if (u_strpbrk(testString, gf) != NULL) {
526        log_err("u_strpbrk didn't return NULL for \"gf\".\n");
527    }
528    if (u_strpbrk(testString, empty) != NULL) {
529        log_err("u_strpbrk didn't return NULL for \"\".\n");
530    }
531
532    log_verbose("Testing u_strpbrk() with surrogates");
533
534    if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
535        log_err("u_strpbrk couldn't find first letter a.\n");
536    }
537    if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
538        log_err("u_strpbrk couldn't find d or c.\n");
539    }
540    if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
541        log_err("u_strpbrk couldn't find c or d.\n");
542    }
543    if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
544        log_err("u_strpbrk couldn't find c, d or h.\n");
545    }
546    if (u_strpbrk(testSurrogateString, f) != NULL) {
547        log_err("u_strpbrk didn't return NULL for \"f\".\n");
548    }
549    if (u_strpbrk(testSurrogateString, fg) != NULL) {
550        log_err("u_strpbrk didn't return NULL for \"fg\".\n");
551    }
552    if (u_strpbrk(testSurrogateString, gf) != NULL) {
553        log_err("u_strpbrk didn't return NULL for \"gf\".\n");
554    }
555    if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
556        log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
557    }
558    if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
559        log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
560    }
561    if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
562        log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
563    }
564    if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
565        log_err("u_strpbrk should have returned NULL for empty string.\n");
566    }
567    if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
568        log_err("u_strpbrk should have found bad surrogate.\n");
569    }
570
571    log_verbose("Testing u_strcspn()");
572
573    if (u_strcspn(testString, a) != 0) {
574        log_err("u_strcspn couldn't find first letter a.\n");
575    }
576    if (u_strcspn(testString, dc) != 2) {
577        log_err("u_strcspn couldn't find d or c.\n");
578    }
579    if (u_strcspn(testString, cd) != 2) {
580        log_err("u_strcspn couldn't find c or d.\n");
581    }
582    if (u_strcspn(testString, cdh) != 2) {
583        log_err("u_strcspn couldn't find c, d or h.\n");
584    }
585    if (u_strcspn(testString, f) != u_strlen(testString)) {
586        log_err("u_strcspn didn't return NULL for \"f\".\n");
587    }
588    if (u_strcspn(testString, fg) != u_strlen(testString)) {
589        log_err("u_strcspn didn't return NULL for \"fg\".\n");
590    }
591    if (u_strcspn(testString, gf) != u_strlen(testString)) {
592        log_err("u_strcspn didn't return NULL for \"gf\".\n");
593    }
594
595    log_verbose("Testing u_strcspn() with surrogates");
596
597    if (u_strcspn(testSurrogateString, a) != 1) {
598        log_err("u_strcspn couldn't find first letter a.\n");
599    }
600    if (u_strcspn(testSurrogateString, dc) != 5) {
601        log_err("u_strcspn couldn't find d or c.\n");
602    }
603    if (u_strcspn(testSurrogateString, cd) != 5) {
604        log_err("u_strcspn couldn't find c or d.\n");
605    }
606    if (u_strcspn(testSurrogateString, cdh) != 5) {
607        log_err("u_strcspn couldn't find c, d or h.\n");
608    }
609    if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
610        log_err("u_strcspn didn't return NULL for \"f\".\n");
611    }
612    if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
613        log_err("u_strcspn didn't return NULL for \"fg\".\n");
614    }
615    if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
616        log_err("u_strcspn didn't return NULL for \"gf\".\n");
617    }
618    if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
619        log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
620    }
621    if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
622        log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
623    }
624    if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
625        log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
626    }
627    if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
628        log_err("u_strcspn should have returned strlen for empty string.\n");
629    }
630
631
632    log_verbose("Testing u_strspn()");
633
634    if (u_strspn(testString, a) != 1) {
635        log_err("u_strspn couldn't skip first letter a.\n");
636    }
637    if (u_strspn(testString, ab) != 2) {
638        log_err("u_strspn couldn't skip a or b.\n");
639    }
640    if (u_strspn(testString, ba) != 2) {
641        log_err("u_strspn couldn't skip a or b.\n");
642    }
643    if (u_strspn(testString, f) != 0) {
644        log_err("u_strspn didn't return 0 for \"f\".\n");
645    }
646    if (u_strspn(testString, dc) != 0) {
647        log_err("u_strspn couldn't find first letter a (skip d or c).\n");
648    }
649    if (u_strspn(testString, abcd) != u_strlen(testString)) {
650        log_err("u_strspn couldn't skip over the whole string.\n");
651    }
652    if (u_strspn(testString, empty) != 0) {
653        log_err("u_strspn should have returned 0 for empty string.\n");
654    }
655
656    log_verbose("Testing u_strspn() with surrogates");
657    if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
658        log_err("u_strspn couldn't skip 0xdbff or a.\n");
659    }
660    if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
661        log_err("u_strspn couldn't skip 0xdbff or a.\n");
662    }
663    if (u_strspn(testSurrogateString, f) != 0) {
664        log_err("u_strspn couldn't skip d or c (skip first letter).\n");
665    }
666    if (u_strspn(testSurrogateString, dc) != 0) {
667        log_err("u_strspn couldn't skip d or c (skip first letter).\n");
668    }
669    if (u_strspn(testSurrogateString, cd) != 0) {
670        log_err("u_strspn couldn't skip d or c (skip first letter).\n");
671    }
672    if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
673        log_err("u_strspn couldn't skip whole string.\n");
674    }
675    if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
676        log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
677    }
678    if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
679        log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
680    }
681    if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
682        log_err("u_strspn should have returned 0 for empty string.\n");
683    }
684}
685
686/*
687 * All binary Unicode string searches should behave the same for equivalent input.
688 * See Jitterbug 2145.
689 * There are some new functions, too - just test them all.
690 */
691static void
692TestSurrogateSearching() {
693    static const UChar s[]={
694        /* 0       1       2     3       4     5       6     7       8       9    10 11 */
695        0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
696    }, sub_a[]={
697        0x61, 0
698    }, sub_b[]={
699        0x62, 0
700    }, sub_lead[]={
701        0xd801, 0
702    }, sub_trail[]={
703        0xdc02, 0
704    }, sub_supp[]={
705        0xd801, 0xdc02, 0
706    }, sub_supp2[]={
707        0xd801, 0xdc03, 0
708    }, sub_a_lead[]={
709        0x61, 0xd801, 0
710    }, sub_trail_a[]={
711        0xdc02, 0x61, 0
712    }, sub_aba[]={
713        0x61, 0x62, 0x61, 0
714    };
715    static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
716    static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
717
718    const UChar *first, *last;
719
720    /* search for NUL code point: find end of string */
721    first=s+u_strlen(s);
722
723    if(
724        first!=u_strchr(s, nul) ||
725        first!=u_strchr32(s, nul) ||
726        first!=u_memchr(s, nul, LENGTHOF(s)) ||
727        first!=u_memchr32(s, nul, LENGTHOF(s)) ||
728        first!=u_strrchr(s, nul) ||
729        first!=u_strrchr32(s, nul) ||
730        first!=u_memrchr(s, nul, LENGTHOF(s)) ||
731        first!=u_memrchr32(s, nul, LENGTHOF(s))
732    ) {
733        log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
734    }
735
736    /* search for empty substring: find beginning of string */
737    if(
738        s!=u_strstr(s, &nul) ||
739        s!=u_strFindFirst(s, -1, &nul, -1) ||
740        s!=u_strFindFirst(s, -1, &nul, 0) ||
741        s!=u_strFindFirst(s, LENGTHOF(s), &nul, -1) ||
742        s!=u_strFindFirst(s, LENGTHOF(s), &nul, 0) ||
743        s!=u_strrstr(s, &nul) ||
744        s!=u_strFindLast(s, -1, &nul, -1) ||
745        s!=u_strFindLast(s, -1, &nul, 0) ||
746        s!=u_strFindLast(s, LENGTHOF(s), &nul, -1) ||
747        s!=u_strFindLast(s, LENGTHOF(s), &nul, 0)
748    ) {
749        log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
750    }
751
752    /* find 'a' in s[1..10[ */
753    first=s+3;
754    last=s+7;
755    if(
756        first!=u_strchr(s+1, a) ||
757        first!=u_strchr32(s+1, a) ||
758        first!=u_memchr(s+1, a, 9) ||
759        first!=u_memchr32(s+1, a, 9) ||
760        first!=u_strstr(s+1, sub_a) ||
761        first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
762        first!=u_strFindFirst(s+1, -1, &a, 1) ||
763        first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
764        first!=u_strFindFirst(s+1, 9, &a, 1) ||
765        (s+10)!=u_strrchr(s+1, a) ||
766        (s+10)!=u_strrchr32(s+1, a) ||
767        last!=u_memrchr(s+1, a, 9) ||
768        last!=u_memrchr32(s+1, a, 9) ||
769        (s+10)!=u_strrstr(s+1, sub_a) ||
770        (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
771        (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
772        last!=u_strFindLast(s+1, 9, sub_a, -1) ||
773        last!=u_strFindLast(s+1, 9, &a, 1)
774    ) {
775        log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
776    }
777
778    /* do not find 'b' in s[1..10[ */
779    if(
780        NULL!=u_strchr(s+1, b) ||
781        NULL!=u_strchr32(s+1, b) ||
782        NULL!=u_memchr(s+1, b, 9) ||
783        NULL!=u_memchr32(s+1, b, 9) ||
784        NULL!=u_strstr(s+1, sub_b) ||
785        NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
786        NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
787        NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
788        NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
789        NULL!=u_strrchr(s+1, b) ||
790        NULL!=u_strrchr32(s+1, b) ||
791        NULL!=u_memrchr(s+1, b, 9) ||
792        NULL!=u_memrchr32(s+1, b, 9) ||
793        NULL!=u_strrstr(s+1, sub_b) ||
794        NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
795        NULL!=u_strFindLast(s+1, -1, &b, 1) ||
796        NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
797        NULL!=u_strFindLast(s+1, 9, &b, 1)
798    ) {
799        log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
800    }
801
802    /* do not find a non-code point in s[1..10[ */
803    if(
804        NULL!=u_strchr32(s+1, ill) ||
805        NULL!=u_memchr32(s+1, ill, 9) ||
806        NULL!=u_strrchr32(s+1, ill) ||
807        NULL!=u_memrchr32(s+1, ill, 9)
808    ) {
809        log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
810    }
811
812    /* find U+d801 in s[1..10[ */
813    first=s+6;
814    if(
815        first!=u_strchr(s+1, lead) ||
816        first!=u_strchr32(s+1, lead) ||
817        first!=u_memchr(s+1, lead, 9) ||
818        first!=u_memchr32(s+1, lead, 9) ||
819        first!=u_strstr(s+1, sub_lead) ||
820        first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
821        first!=u_strFindFirst(s+1, -1, &lead, 1) ||
822        first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
823        first!=u_strFindFirst(s+1, 9, &lead, 1) ||
824        first!=u_strrchr(s+1, lead) ||
825        first!=u_strrchr32(s+1, lead) ||
826        first!=u_memrchr(s+1, lead, 9) ||
827        first!=u_memrchr32(s+1, lead, 9) ||
828        first!=u_strrstr(s+1, sub_lead) ||
829        first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
830        first!=u_strFindLast(s+1, -1, &lead, 1) ||
831        first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
832        first!=u_strFindLast(s+1, 9, &lead, 1)
833    ) {
834        log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
835    }
836
837    /* find U+dc02 in s[1..10[ */
838    first=s+4;
839    if(
840        first!=u_strchr(s+1, trail) ||
841        first!=u_strchr32(s+1, trail) ||
842        first!=u_memchr(s+1, trail, 9) ||
843        first!=u_memchr32(s+1, trail, 9) ||
844        first!=u_strstr(s+1, sub_trail) ||
845        first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
846        first!=u_strFindFirst(s+1, -1, &trail, 1) ||
847        first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
848        first!=u_strFindFirst(s+1, 9, &trail, 1) ||
849        first!=u_strrchr(s+1, trail) ||
850        first!=u_strrchr32(s+1, trail) ||
851        first!=u_memrchr(s+1, trail, 9) ||
852        first!=u_memrchr32(s+1, trail, 9) ||
853        first!=u_strrstr(s+1, sub_trail) ||
854        first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
855        first!=u_strFindLast(s+1, -1, &trail, 1) ||
856        first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
857        first!=u_strFindLast(s+1, 9, &trail, 1)
858    ) {
859        log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
860    }
861
862    /* find U+10402 in s[1..10[ */
863    first=s+1;
864    last=s+8;
865    if(
866        first!=u_strchr32(s+1, supp) ||
867        first!=u_memchr32(s+1, supp, 9) ||
868        first!=u_strstr(s+1, sub_supp) ||
869        first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
870        first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
871        first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
872        first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
873        last!=u_strrchr32(s+1, supp) ||
874        last!=u_memrchr32(s+1, supp, 9) ||
875        last!=u_strrstr(s+1, sub_supp) ||
876        last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
877        last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
878        last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
879        last!=u_strFindLast(s+1, 9, sub_supp, 2)
880    ) {
881        log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
882    }
883
884    /* do not find U+10402 in a single UChar */
885    if(
886        NULL!=u_memchr32(s+1, supp, 1) ||
887        NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
888        NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
889        NULL!=u_memrchr32(s+1, supp, 1) ||
890        NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
891        NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
892        NULL!=u_memrchr32(s+2, supp, 1) ||
893        NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
894        NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
895    ) {
896        log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
897    }
898
899    /* do not find U+10403 in s[1..10[ */
900    if(
901        NULL!=u_strchr32(s+1, supp2) ||
902        NULL!=u_memchr32(s+1, supp2, 9) ||
903        NULL!=u_strstr(s+1, sub_supp2) ||
904        NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
905        NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
906        NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
907        NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
908        NULL!=u_strrchr32(s+1, supp2) ||
909        NULL!=u_memrchr32(s+1, supp2, 9) ||
910        NULL!=u_strrstr(s+1, sub_supp2) ||
911        NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
912        NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
913        NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
914        NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
915    ) {
916        log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
917    }
918
919    /* find <0061 d801> in s[1..10[ */
920    first=s+5;
921    if(
922        first!=u_strstr(s+1, sub_a_lead) ||
923        first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
924        first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
925        first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
926        first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
927        first!=u_strrstr(s+1, sub_a_lead) ||
928        first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
929        first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
930        first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
931        first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
932    ) {
933        log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
934    }
935
936    /* find <dc02 0061> in s[1..10[ */
937    first=s+4;
938    if(
939        first!=u_strstr(s+1, sub_trail_a) ||
940        first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
941        first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
942        first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
943        first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
944        first!=u_strrstr(s+1, sub_trail_a) ||
945        first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
946        first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
947        first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
948        first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
949    ) {
950        log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
951    }
952
953    /* do not find "aba" in s[1..10[ */
954    if(
955        NULL!=u_strstr(s+1, sub_aba) ||
956        NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
957        NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
958        NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
959        NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
960        NULL!=u_strrstr(s+1, sub_aba) ||
961        NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
962        NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
963        NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
964        NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
965    ) {
966        log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
967    }
968}
969
970static void TestStringCopy()
971{
972    UChar temp[40];
973    UChar *result=0;
974    UChar subString[5];
975    UChar uchars[]={0x61, 0x62, 0x63, 0x00};
976    char  charOut[40];
977    char  chars[]="abc";    /* needs default codepage */
978
979    log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
980
981    u_uastrcpy(temp, "abc");
982    if(u_strcmp(temp, uchars) != 0) {
983        log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
984    }
985
986    temp[0] = 0xFB; /* load garbage into it */
987    temp[1] = 0xFB;
988    temp[2] = 0xFB;
989    temp[3] = 0xFB;
990
991    u_uastrncpy(temp, "abcabcabc", 3);
992    if(u_strncmp(uchars, temp, 3) != 0){
993        log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
994    }
995    if(temp[3] != 0xFB) {
996        log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
997    }
998
999    charOut[0] = (char)0x7B; /* load garbage into it */
1000    charOut[1] = (char)0x7B;
1001    charOut[2] = (char)0x7B;
1002    charOut[3] = (char)0x7B;
1003
1004    temp[0] = 0x0061;
1005    temp[1] = 0x0062;
1006    temp[2] = 0x0063;
1007    temp[3] = 0x0061;
1008    temp[4] = 0x0062;
1009    temp[5] = 0x0063;
1010    temp[6] = 0x0000;
1011
1012    u_austrncpy(charOut, temp, 3);
1013    if(strncmp(chars, charOut, 3) != 0){
1014        log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1015    }
1016    if(charOut[3] != (char)0x7B) {
1017        log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1018    }
1019
1020    /*Testing u_strchr()*/
1021    log_verbose("Testing u_strchr\n");
1022    temp[0]=0x42;
1023    temp[1]=0x62;
1024    temp[2]=0x62;
1025    temp[3]=0x63;
1026    temp[4]=0xd841;
1027    temp[5]=0xd841;
1028    temp[6]=0xdc02;
1029    temp[7]=0;
1030    result=u_strchr(temp, (UChar)0x62);
1031    if(result != temp+1){
1032        log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1033    }
1034    /*Testing u_strstr()*/
1035    log_verbose("Testing u_strstr\n");
1036    subString[0]=0x62;
1037    subString[1]=0x63;
1038    subString[2]=0;
1039    result=u_strstr(temp, subString);
1040    if(result != temp+2){
1041        log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1042    }
1043    result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1044    if(result != temp){
1045        log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1046    }
1047    result=u_strstr(subString, temp);
1048    if(result != NULL){
1049        log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1050    }
1051
1052    /*Testing u_strchr32*/
1053    log_verbose("Testing u_strchr32\n");
1054    result=u_strchr32(temp, (UChar32)0x62);
1055    if(result != temp+1){
1056        log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1057    }
1058    result=u_strchr32(temp, (UChar32)0xfb);
1059    if(result != NULL){
1060        log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1061    }
1062    result=u_strchr32(temp, (UChar32)0x20402);
1063    if(result != temp+5){
1064        log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1065    }
1066
1067    temp[7]=0xfc00;
1068    result=u_memchr32(temp, (UChar32)0x20402, 7);
1069    if(result != temp+5){
1070        log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1071    }
1072    result=u_memchr32(temp, (UChar32)0x20402, 6);
1073    if(result != NULL){
1074        log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1075    }
1076    result=u_memchr32(temp, (UChar32)0x20402, 1);
1077    if(result != NULL){
1078        log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1079    }
1080    result=u_memchr32(temp, (UChar32)0xfc00, 8);
1081    if(result != temp+7){
1082        log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1083    }
1084}
1085
1086/* test u_unescape() and u_unescapeAt() ------------------------------------- */
1087
1088static void
1089TestUnescape() {
1090    static UChar buffer[200];
1091
1092    static const char* input =
1093        "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1094
1095    static const UChar expect[]={
1096        0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1097        0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1098        0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1099        0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1100    };
1101    static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1;
1102    int32_t length;
1103
1104    /* test u_unescape() */
1105    length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0]));
1106    if(length!=explength || u_strcmp(buffer, expect)!=0) {
1107        log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1108                explength);
1109    }
1110
1111    /* try preflighting */
1112    length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0]));
1113    if(length!=explength || u_strcmp(buffer, expect)!=0) {
1114        log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1115    }
1116
1117    /* ### TODO: test u_unescapeAt() */
1118}
1119
1120/* test code point counting functions --------------------------------------- */
1121
1122/* reference implementation of u_strHasMoreChar32Than() */
1123static int32_t
1124_refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1125    int32_t count=u_countChar32(s, length);
1126    return count>number;
1127}
1128
1129/* compare the real function against the reference */
1130static void
1131_testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1132    if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1133        log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1134                i, length, number, u_strHasMoreChar32Than(s, length, number));
1135    }
1136}
1137
1138static void
1139TestCountChar32() {
1140    static const UChar string[]={
1141        0x61, 0x62, 0xd800, 0xdc00,
1142        0xd801, 0xdc01, 0x63, 0xd802,
1143        0x64, 0xdc03, 0x65, 0x66,
1144        0xd804, 0xdc04, 0xd805, 0xdc05,
1145        0x67
1146    };
1147    UChar buffer[100];
1148    int32_t i, length, number;
1149
1150    /* test u_strHasMoreChar32Than() with length>=0 */
1151    length=LENGTHOF(string);
1152    while(length>=0) {
1153        for(i=0; i<=length; ++i) {
1154            for(number=-1; number<=((length-i)+2); ++number) {
1155                _testStrHasMoreChar32Than(string+i, i, length-i, number);
1156            }
1157        }
1158        --length;
1159    }
1160
1161    /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1162    length=LENGTHOF(string);
1163    u_memcpy(buffer, string, length);
1164    while(length>=0) {
1165        buffer[length]=0;
1166        for(i=0; i<=length; ++i) {
1167            for(number=-1; number<=((length-i)+2); ++number) {
1168                _testStrHasMoreChar32Than(string+i, i, -1, number);
1169            }
1170        }
1171        --length;
1172    }
1173
1174    /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1175    for(length=-1; length<=1; ++length) {
1176        for(i=0; i<=length; ++i) {
1177            for(number=-2; number<=2; ++number) {
1178                _testStrHasMoreChar32Than(NULL, 0, length, number);
1179            }
1180        }
1181    }
1182}
1183
1184/* UCharIterator ------------------------------------------------------------ */
1185
1186/*
1187 * Compare results from two iterators, should be same.
1188 * Assume that the text is not empty and that
1189 * iteration start==0 and iteration limit==length.
1190 */
1191static void
1192compareIterators(UCharIterator *iter1, const char *n1,
1193                 UCharIterator *iter2, const char *n2) {
1194    int32_t i, pos1, pos2, middle, length;
1195    UChar32 c1, c2;
1196
1197    /* compare lengths */
1198    length=iter1->getIndex(iter1, UITER_LENGTH);
1199    pos2=iter2->getIndex(iter2, UITER_LENGTH);
1200    if(length!=pos2) {
1201        log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1202        return;
1203    }
1204
1205    /* set into the middle */
1206    middle=length/2;
1207
1208    pos1=iter1->move(iter1, middle, UITER_ZERO);
1209    if(pos1!=middle) {
1210        log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1211        return;
1212    }
1213
1214    pos2=iter2->move(iter2, middle, UITER_ZERO);
1215    if(pos2!=middle) {
1216        log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1217        return;
1218    }
1219
1220    /* test current() */
1221    c1=iter1->current(iter1);
1222    c2=iter2->current(iter2);
1223    if(c1!=c2) {
1224        log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1225        return;
1226    }
1227
1228    /* move forward 3 UChars */
1229    for(i=0; i<3; ++i) {
1230        c1=iter1->next(iter1);
1231        c2=iter2->next(iter2);
1232        if(c1!=c2) {
1233            log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1234            return;
1235        }
1236    }
1237
1238    /* move backward 5 UChars */
1239    for(i=0; i<5; ++i) {
1240        c1=iter1->previous(iter1);
1241        c2=iter2->previous(iter2);
1242        if(c1!=c2) {
1243            log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1244            return;
1245        }
1246    }
1247
1248    /* iterate forward from the beginning */
1249    pos1=iter1->move(iter1, 0, UITER_START);
1250    if(pos1<0) {
1251        log_err("%s->move(start) failed\n", n1);
1252        return;
1253    }
1254    if(!iter1->hasNext(iter1)) {
1255        log_err("%s->hasNext() at the start returns FALSE\n", n1);
1256        return;
1257    }
1258
1259    pos2=iter2->move(iter2, 0, UITER_START);
1260    if(pos2<0) {
1261        log_err("%s->move(start) failed\n", n2);
1262        return;
1263    }
1264    if(!iter2->hasNext(iter2)) {
1265        log_err("%s->hasNext() at the start returns FALSE\n", n2);
1266        return;
1267    }
1268
1269    do {
1270        c1=iter1->next(iter1);
1271        c2=iter2->next(iter2);
1272        if(c1!=c2) {
1273            log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1274            return;
1275        }
1276    } while(c1>=0);
1277
1278    if(iter1->hasNext(iter1)) {
1279        log_err("%s->hasNext() at the end returns TRUE\n", n1);
1280        return;
1281    }
1282    if(iter2->hasNext(iter2)) {
1283        log_err("%s->hasNext() at the end returns TRUE\n", n2);
1284        return;
1285    }
1286
1287    /* back to the middle */
1288    pos1=iter1->move(iter1, middle, UITER_ZERO);
1289    if(pos1!=middle) {
1290        log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1291        return;
1292    }
1293
1294    pos2=iter2->move(iter2, middle, UITER_ZERO);
1295    if(pos2!=middle) {
1296        log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1297        return;
1298    }
1299
1300    /* move to index 1 */
1301    pos1=iter1->move(iter1, 1, UITER_ZERO);
1302    if(pos1!=1) {
1303        log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1304        return;
1305    }
1306
1307    pos2=iter2->move(iter2, 1, UITER_ZERO);
1308    if(pos2!=1) {
1309        log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1310        return;
1311    }
1312
1313    /* iterate backward from the end */
1314    pos1=iter1->move(iter1, 0, UITER_LIMIT);
1315    if(pos1<0) {
1316        log_err("%s->move(limit) failed\n", n1);
1317        return;
1318    }
1319    if(!iter1->hasPrevious(iter1)) {
1320        log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
1321        return;
1322    }
1323
1324    pos2=iter2->move(iter2, 0, UITER_LIMIT);
1325    if(pos2<0) {
1326        log_err("%s->move(limit) failed\n", n2);
1327        return;
1328    }
1329    if(!iter2->hasPrevious(iter2)) {
1330        log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
1331        return;
1332    }
1333
1334    do {
1335        c1=iter1->previous(iter1);
1336        c2=iter2->previous(iter2);
1337        if(c1!=c2) {
1338            log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1339            return;
1340        }
1341    } while(c1>=0);
1342
1343    if(iter1->hasPrevious(iter1)) {
1344        log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
1345        return;
1346    }
1347    if(iter2->hasPrevious(iter2)) {
1348        log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
1349        return;
1350    }
1351}
1352
1353/*
1354 * Test the iterator's getState() and setState() functions.
1355 * iter1 and iter2 must be set up for the same iterator type and the same string
1356 * but may be physically different structs (different addresses).
1357 *
1358 * Assume that the text is not empty and that
1359 * iteration start==0 and iteration limit==length.
1360 * It must be 2<=middle<=length-2.
1361 */
1362static void
1363testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1364    UChar32 u[4];
1365
1366    UErrorCode errorCode;
1367    UChar32 c;
1368    uint32_t state;
1369    int32_t i, j;
1370
1371    /* get four UChars from the middle of the string */
1372    iter1->move(iter1, middle-2, UITER_ZERO);
1373    for(i=0; i<4; ++i) {
1374        c=iter1->next(iter1);
1375        if(c<0) {
1376            /* the test violates the assumptions, see comment above */
1377            log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1378            return;
1379        }
1380        u[i]=c;
1381    }
1382
1383    /* move to the middle and get the state */
1384    iter1->move(iter1, -2, UITER_CURRENT);
1385    state=uiter_getState(iter1);
1386
1387    /* set the state into the second iterator and compare the results */
1388    errorCode=U_ZERO_ERROR;
1389    uiter_setState(iter2, state, &errorCode);
1390    if(U_FAILURE(errorCode)) {
1391        log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1392        return;
1393    }
1394
1395    c=iter2->current(iter2);
1396    if(c!=u[2]) {
1397        log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1398    }
1399
1400    c=iter2->previous(iter2);
1401    if(c!=u[1]) {
1402        log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1403    }
1404
1405    iter2->move(iter2, 2, UITER_CURRENT);
1406    c=iter2->next(iter2);
1407    if(c!=u[3]) {
1408        log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1409    }
1410
1411    iter2->move(iter2, -3, UITER_CURRENT);
1412    c=iter2->previous(iter2);
1413    if(c!=u[0]) {
1414        log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1415    }
1416
1417    /* move the second iterator back to the middle */
1418    iter2->move(iter2, 1, UITER_CURRENT);
1419    iter2->next(iter2);
1420
1421    /* check that both are in the middle */
1422    i=iter1->getIndex(iter1, UITER_CURRENT);
1423    j=iter2->getIndex(iter2, UITER_CURRENT);
1424    if(i!=middle) {
1425        log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1426    }
1427    if(i!=j) {
1428        log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1429    }
1430
1431    /* compare lengths */
1432    i=iter1->getIndex(iter1, UITER_LENGTH);
1433    j=iter2->getIndex(iter2, UITER_LENGTH);
1434    if(i!=j) {
1435        log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1436    }
1437}
1438
1439static void
1440TestUCharIterator() {
1441    static const UChar text[]={
1442        0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1443    };
1444    char bytes[40];
1445
1446    UCharIterator iter, iter1, iter2;
1447    UConverter *cnv;
1448    UErrorCode errorCode;
1449    int32_t length;
1450
1451    /* simple API/code coverage - test NOOP UCharIterator */
1452    uiter_setString(&iter, NULL, 0);
1453    if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1454        iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1455        iter.hasNext(&iter) || iter.hasPrevious(&iter)
1456    ) {
1457        log_err("NOOP UCharIterator behaves unexpectedly\n");
1458    }
1459
1460    /* test get/set state */
1461    length=LENGTHOF(text)-1;
1462    uiter_setString(&iter1, text, -1);
1463    uiter_setString(&iter2, text, length);
1464    testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1465    testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1466
1467    /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1468    errorCode=U_ZERO_ERROR;
1469    u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1470    if(U_FAILURE(errorCode)) {
1471        log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1472        return;
1473    }
1474
1475    uiter_setString(&iter1, text, -1);
1476    uiter_setUTF8(&iter2, bytes, length);
1477    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1478
1479    /* try again with length=-1 */
1480    uiter_setUTF8(&iter2, bytes, -1);
1481    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1482
1483    /* test get/set state */
1484    length=LENGTHOF(text)-1;
1485    uiter_setUTF8(&iter1, bytes, -1);
1486    testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1487    testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1488
1489    /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1490    errorCode=U_ZERO_ERROR;
1491    cnv=ucnv_open("UTF-16BE", &errorCode);
1492    length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1493    ucnv_close(cnv);
1494    if(U_FAILURE(errorCode)) {
1495        log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1496        return;
1497    }
1498
1499    /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1500    bytes[length]=bytes[length+1]=0;
1501
1502    uiter_setString(&iter1, text, -1);
1503    uiter_setUTF16BE(&iter2, bytes, length);
1504    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1505
1506    /* try again with length=-1 */
1507    uiter_setUTF16BE(&iter2, bytes, -1);
1508    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1509
1510    /* try again after moving the bytes up one, and with length=-1 */
1511    memmove(bytes+1, bytes, length+2);
1512    uiter_setUTF16BE(&iter2, bytes+1, -1);
1513    compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1514
1515    /* ### TODO test other iterators: CharacterIterator, Replaceable */
1516}
1517
1518#if UCONFIG_NO_COLLATION
1519
1520static void
1521TestUNormIterator() {
1522    /* test nothing */
1523}
1524
1525static void
1526TestBadUNormIterator(void) {
1527    /* test nothing, as well */
1528}
1529
1530#else
1531
1532#include "unicode/unorm.h"
1533#include "unorm_it.h"
1534
1535/*
1536 * Compare results from two iterators, should be same.
1537 * Assume that the text is not empty and that
1538 * iteration start==0 and iteration limit==length.
1539 *
1540 * Modified version of compareIterators() but does not assume that indexes
1541 * are available.
1542 */
1543static void
1544compareIterNoIndexes(UCharIterator *iter1, const char *n1,
1545                     UCharIterator *iter2, const char *n2,
1546                     int32_t middle) {
1547    uint32_t state;
1548    int32_t i;
1549    UChar32 c1, c2;
1550    UErrorCode errorCode;
1551
1552    /* code coverage for unorm_it.c/unormIteratorGetIndex() */
1553    if(
1554        iter2->getIndex(iter2, UITER_START)!=0 ||
1555        iter2->getIndex(iter2, UITER_LENGTH)!=UITER_UNKNOWN_INDEX
1556    ) {
1557        log_err("UNormIterator.getIndex() failed\n");
1558    }
1559
1560    /* set into the middle */
1561    iter1->move(iter1, middle, UITER_ZERO);
1562    iter2->move(iter2, middle, UITER_ZERO);
1563
1564    /* test current() */
1565    c1=iter1->current(iter1);
1566    c2=iter2->current(iter2);
1567    if(c1!=c2) {
1568        log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1569        return;
1570    }
1571
1572    /* move forward 3 UChars */
1573    for(i=0; i<3; ++i) {
1574        c1=iter1->next(iter1);
1575        c2=iter2->next(iter2);
1576        if(c1!=c2) {
1577            log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1578            return;
1579        }
1580    }
1581
1582    /* move backward 5 UChars */
1583    for(i=0; i<5; ++i) {
1584        c1=iter1->previous(iter1);
1585        c2=iter2->previous(iter2);
1586        if(c1!=c2) {
1587            log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1588            return;
1589        }
1590    }
1591
1592    /* iterate forward from the beginning */
1593    iter1->move(iter1, 0, UITER_START);
1594    if(!iter1->hasNext(iter1)) {
1595        log_err("%s->hasNext() at the start returns FALSE\n", n1);
1596        return;
1597    }
1598
1599    iter2->move(iter2, 0, UITER_START);
1600    if(!iter2->hasNext(iter2)) {
1601        log_err("%s->hasNext() at the start returns FALSE\n", n2);
1602        return;
1603    }
1604
1605    do {
1606        c1=iter1->next(iter1);
1607        c2=iter2->next(iter2);
1608        if(c1!=c2) {
1609            log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1610            return;
1611        }
1612    } while(c1>=0);
1613
1614    if(iter1->hasNext(iter1)) {
1615        log_err("%s->hasNext() at the end returns TRUE\n", n1);
1616        return;
1617    }
1618    if(iter2->hasNext(iter2)) {
1619        log_err("%s->hasNext() at the end returns TRUE\n", n2);
1620        return;
1621    }
1622
1623    /* iterate backward */
1624    do {
1625        c1=iter1->previous(iter1);
1626        c2=iter2->previous(iter2);
1627        if(c1!=c2) {
1628            log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1629            return;
1630        }
1631    } while(c1>=0);
1632
1633    /* back to the middle */
1634    iter1->move(iter1, middle, UITER_ZERO);
1635    iter2->move(iter2, middle, UITER_ZERO);
1636
1637    /* try get/set state */
1638    while((state=uiter_getState(iter2))==UITER_NO_STATE) {
1639        if(!iter2->hasNext(iter2)) {
1640            log_err("%s has no known state from middle=%d to the end\n", n2, middle);
1641            return;
1642        }
1643        iter2->next(iter2);
1644    }
1645
1646    errorCode=U_ZERO_ERROR;
1647
1648    c2=iter2->current(iter2);
1649    iter2->move(iter2, 0, UITER_ZERO);
1650    uiter_setState(iter2, state, &errorCode);
1651    c1=iter2->current(iter2);
1652    if(U_FAILURE(errorCode) || c1!=c2) {
1653        log_err("%s->current() differs across get/set state, U+%04x vs. U+%04x\n", n2, c2, c1);
1654        return;
1655    }
1656
1657    c2=iter2->previous(iter2);
1658    iter2->move(iter2, 0, UITER_ZERO);
1659    uiter_setState(iter2, state, &errorCode);
1660    c1=iter2->previous(iter2);
1661    if(U_FAILURE(errorCode) || c1!=c2) {
1662        log_err("%s->previous() differs across get/set state, U+%04x vs. U+%04x\n", n2, c2, c1);
1663        return;
1664    }
1665
1666    /* iterate backward from the end */
1667    iter1->move(iter1, 0, UITER_LIMIT);
1668    if(!iter1->hasPrevious(iter1)) {
1669        log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
1670        return;
1671    }
1672
1673    iter2->move(iter2, 0, UITER_LIMIT);
1674    if(!iter2->hasPrevious(iter2)) {
1675        log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
1676        return;
1677    }
1678
1679    do {
1680        c1=iter1->previous(iter1);
1681        c2=iter2->previous(iter2);
1682        if(c1!=c2) {
1683            log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1684            return;
1685        }
1686    } while(c1>=0);
1687
1688    if(iter1->hasPrevious(iter1)) {
1689        log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
1690        return;
1691    }
1692    if(iter2->hasPrevious(iter2)) {
1693        log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
1694        return;
1695    }
1696}
1697
1698/* n2 must have a digit 1 at the end, will be incremented with the normalization mode */
1699static void
1700testUNormIteratorWithText(const UChar *text, int32_t textLength, int32_t middle,
1701                          const char *name1, const char *n2) {
1702    UChar buffer[600];
1703    char name2[40];
1704
1705    UCharIterator iter1, iter2, *iter;
1706    UNormIterator *uni;
1707
1708    UNormalizationMode mode;
1709    UErrorCode errorCode;
1710    int32_t length;
1711
1712    /* open a normalizing iterator */
1713    errorCode=U_ZERO_ERROR;
1714    uni=unorm_openIter(NULL, 0, &errorCode);
1715    if(U_FAILURE(errorCode)) {
1716        log_err("unorm_openIter() fails: %s\n", u_errorName(errorCode));
1717        return;
1718    }
1719
1720    /* set iterator 2 to the original text */
1721    uiter_setString(&iter2, text, textLength);
1722
1723    strcpy(name2, n2);
1724
1725    /* test the normalizing iterator for each mode */
1726    for(mode=UNORM_NONE; mode<UNORM_MODE_COUNT; ++mode) {
1727        length=unorm_normalize(text, textLength, mode, 0, buffer, LENGTHOF(buffer), &errorCode);
1728        if(U_FAILURE(errorCode)) {
1729            log_data_err("unorm_normalize(mode %d) failed: %s - (Are you missing data?)\n", mode, u_errorName(errorCode));
1730            break;
1731        }
1732
1733        /* set iterator 1 to the normalized text  */
1734        uiter_setString(&iter1, buffer, length);
1735
1736        /* set the normalizing iterator to use iter2 */
1737        iter=unorm_setIter(uni, &iter2, mode, &errorCode);
1738        if(U_FAILURE(errorCode)) {
1739            log_err("unorm_setIter(mode %d) failed: %s\n", mode, u_errorName(errorCode));
1740            break;
1741        }
1742
1743        compareIterNoIndexes(&iter1, name1, iter, name2, middle);
1744        ++name2[strlen(name2)-1];
1745    }
1746
1747    unorm_closeIter(uni);
1748}
1749
1750static void
1751TestUNormIterator() {
1752    static const UChar text[]={ /* must contain <00C5 0327> see u_strchr() below */
1753        0x61,                                                   /* 'a' */
1754        0xe4, 0x61, 0x308,                                      /* variations of 'a'+umlaut */
1755        0xc5, 0x327, 0x41, 0x30a, 0x327, 0x41, 0x327, 0x30a,    /* variations of 'A'+ring+cedilla */
1756        0xfb03, 0xfb00, 0x69, 0x66, 0x66, 0x69, 0x66, 0xfb01    /* variations of 'ffi' */
1757    };
1758    static const UChar surrogateText[]={
1759        0x6e, 0xd900, 0x6a, 0xdc00, 0xd900, 0xdc00, 0x61
1760    };
1761
1762    UChar longText[600];
1763    int32_t i, middle, length;
1764
1765    length=LENGTHOF(text);
1766    testUNormIteratorWithText(text, length, length/2, "UCharIter", "UNormIter1");
1767    testUNormIteratorWithText(text, length, length, "UCharIterEnd", "UNormIterEnd1");
1768
1769    /* test again, this time with an insane string to cause internal buffer overflows */
1770    middle=(int32_t)(u_strchr(text, 0x327)-text); /* see comment at text[] */
1771    memcpy(longText, text, middle*U_SIZEOF_UCHAR);
1772    for(i=0; i<150; ++i) {
1773        longText[middle+i]=0x30a; /* insert many rings between 'A-ring' and cedilla */
1774    }
1775    memcpy(longText+middle+i, text+middle, (LENGTHOF(text)-middle)*U_SIZEOF_UCHAR);
1776    length=LENGTHOF(text)+i;
1777
1778    /* append another copy of this string for more overflows */
1779    memcpy(longText+length, longText, length*U_SIZEOF_UCHAR);
1780    length*=2;
1781
1782    /* the first test of the following two starts at length/4, inside the sea of combining rings */
1783    testUNormIteratorWithText(longText, length, length/4, "UCharIterLong", "UNormIterLong1");
1784    testUNormIteratorWithText(longText, length, length, "UCharIterLongEnd", "UNormIterLongEnd1");
1785
1786    length=LENGTHOF(surrogateText);
1787    testUNormIteratorWithText(surrogateText, length, length/4, "UCharIterSurr", "UNormIterSurr1");
1788    testUNormIteratorWithText(surrogateText, length, length, "UCharIterSurrEnd", "UNormIterSurrEnd1");
1789}
1790
1791static void
1792TestBadUNormIterator(void) {
1793#if !UCONFIG_NO_NORMALIZATION
1794    UErrorCode status = U_ILLEGAL_ESCAPE_SEQUENCE;
1795    UNormIterator *uni;
1796
1797    unorm_setIter(NULL, NULL, UNORM_NONE, &status);
1798    if (status != U_ILLEGAL_ESCAPE_SEQUENCE) {
1799        log_err("unorm_setIter changed the error code to: %s\n", u_errorName(status));
1800    }
1801    status = U_ZERO_ERROR;
1802    unorm_setIter(NULL, NULL, UNORM_NONE, &status);
1803    if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1804        log_err("unorm_setIter didn't react correctly to bad arguments: %s\n", u_errorName(status));
1805    }
1806    status = U_ZERO_ERROR;
1807    uni=unorm_openIter(NULL, 0, &status);
1808    if(U_FAILURE(status)) {
1809        log_err("unorm_openIter() fails: %s\n", u_errorName(status));
1810        return;
1811    }
1812    unorm_setIter(uni, NULL, UNORM_NONE, &status);
1813    unorm_closeIter(uni);
1814#endif
1815}
1816
1817#endif
1818