1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8/********************************************************************************
9*
10* File reapits.c
11*
12*********************************************************************************/
13/*C API TEST FOR Regular Expressions */
14/**
15*   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
16*   try to test the full functionality.  It just calls each function and verifies that it
17*   works on a basic level.
18*
19*   More complete testing of regular expression functionality is done with the C++ tests.
20**/
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26#include <stdlib.h>
27#include <string.h>
28#include "unicode/uloc.h"
29#include "unicode/uregex.h"
30#include "unicode/ustring.h"
31#include "unicode/utext.h"
32#include "unicode/utf8.h"
33#include "cintltst.h"
34#include "cmemory.h"
35
36#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
37log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
38
39#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
40log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
41
42/*
43 *   TEST_SETUP and TEST_TEARDOWN
44 *         macros to handle the boilerplate around setting up regex test cases.
45 *         parameteres to setup:
46 *              pattern:     The regex pattern, a (char *) null terminated C string.
47 *              testString:  The string data, also a (char *) C string.
48 *              flags:       Regex flags to set when compiling the pattern
49 *
50 *         Put arbitrary test code between SETUP and TEARDOWN.
51 *         're" is the compiled, ready-to-go  regular expression.
52 */
53#define TEST_SETUP(pattern, testString, flags) {  \
54    UChar   *srcString = NULL;  \
55    status = U_ZERO_ERROR; \
56    re = uregex_openC(pattern, flags, NULL, &status);  \
57    TEST_ASSERT_SUCCESS(status);   \
58    srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
59    u_uastrncpy(srcString, testString,  strlen(testString)+1); \
60    uregex_setText(re, srcString, -1, &status); \
61    TEST_ASSERT_SUCCESS(status);  \
62    if (U_SUCCESS(status)) {
63
64#define TEST_TEARDOWN  \
65    }  \
66    TEST_ASSERT_SUCCESS(status);  \
67    uregex_close(re);  \
68    free(srcString);   \
69    }
70
71
72/**
73 * @param expected utf-8 array of bytes to be expected
74 */
75static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
76     char     buf_inside_macro[120];
77     int32_t  len = (int32_t)strlen(expected);
78     UBool    success;
79     if (nulTerm) {
80         u_austrncpy(buf_inside_macro, (actual), len+1);
81         buf_inside_macro[len+2] = 0;
82         success = (strcmp((expected), buf_inside_macro) == 0);
83     } else {
84         u_austrncpy(buf_inside_macro, (actual), len);
85         buf_inside_macro[len+1] = 0;
86         success = (strncmp((expected), buf_inside_macro, len) == 0);
87     }
88     if (success == FALSE) {
89         log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
90             file, line, (expected), buf_inside_macro);
91     }
92}
93
94#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
95
96
97static UBool equals_utf8_utext(const char *utf8, UText *utext) {
98    int32_t u8i = 0;
99    UChar32 u8c = 0;
100    UChar32 utc = 0;
101    UBool   stringsEqual = TRUE;
102    utext_setNativeIndex(utext, 0);
103    for (;;) {
104        U8_NEXT_UNSAFE(utf8, u8i, u8c);
105        utc = utext_next32(utext);
106        if (u8c == 0 && utc == U_SENTINEL) {
107            break;
108        }
109        if (u8c != utc || u8c == 0) {
110            stringsEqual = FALSE;
111            break;
112        }
113    }
114    return stringsEqual;
115}
116
117
118static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
119    utext_setNativeIndex(actual, 0);
120    if (!equals_utf8_utext(expected, actual)) {
121        UChar32 c;
122        log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
123        c = utext_next32From(actual, 0);
124        while (c != U_SENTINEL) {
125            if (0x20<c && c <0x7e) {
126                log_err("%c", c);
127            } else {
128                log_err("%#x", c);
129            }
130            c = UTEXT_NEXT32(actual);
131        }
132        log_err("\"\n");
133    }
134}
135
136/*
137 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
138 *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
139 */
140#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
141
142static UBool testUTextEqual(UText *uta, UText *utb) {
143    UChar32 ca = 0;
144    UChar32 cb = 0;
145    utext_setNativeIndex(uta, 0);
146    utext_setNativeIndex(utb, 0);
147    do {
148        ca = utext_next32(uta);
149        cb = utext_next32(utb);
150        if (ca != cb) {
151            break;
152        }
153    } while (ca != U_SENTINEL);
154    return ca == cb;
155}
156
157
158
159
160static void TestRegexCAPI(void);
161static void TestBug4315(void);
162static void TestUTextAPI(void);
163static void TestRefreshInput(void);
164static void TestBug8421(void);
165static void TestBug10815(void);
166
167void addURegexTest(TestNode** root);
168
169void addURegexTest(TestNode** root)
170{
171    addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
172    addTest(root, &TestBug4315,   "regex/TestBug4315");
173    addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
174    addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
175    addTest(root, &TestBug8421,   "regex/TestBug8421");
176    addTest(root, &TestBug10815,   "regex/TestBug10815");
177}
178
179/*
180 * Call back function and context struct used for testing
181 *    regular expression user callbacks.  This test is mostly the same as
182 *   the corresponding C++ test in intltest.
183 */
184typedef struct callBackContext {
185    int32_t          maxCalls;
186    int32_t          numCalls;
187    int32_t          lastSteps;
188} callBackContext;
189
190static UBool U_EXPORT2 U_CALLCONV
191TestCallbackFn(const void *context, int32_t steps) {
192  callBackContext  *info = (callBackContext *)context;
193  if (info->lastSteps+1 != steps) {
194      log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
195  }
196  info->lastSteps = steps;
197  info->numCalls++;
198  return (info->numCalls < info->maxCalls);
199}
200
201/*
202 *   Regular Expression C API Tests
203 */
204static void TestRegexCAPI(void) {
205    UErrorCode           status = U_ZERO_ERROR;
206    URegularExpression  *re;
207    UChar                pat[200];
208    UChar               *minus1;
209
210    memset(&minus1, -1, sizeof(minus1));
211
212    /* Mimimalist open/close */
213    u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
214    re = uregex_open(pat, -1, 0, 0, &status);
215    if (U_FAILURE(status)) {
216         log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
217         return;
218    }
219    uregex_close(re);
220
221    /* Open with all flag values set */
222    status = U_ZERO_ERROR;
223    re = uregex_open(pat, -1,
224        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
225        0, &status);
226    TEST_ASSERT_SUCCESS(status);
227    uregex_close(re);
228
229    /* Open with an invalid flag */
230    status = U_ZERO_ERROR;
231    re = uregex_open(pat, -1, 0x40000000, 0, &status);
232    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
233    uregex_close(re);
234
235    /* Open with an unimplemented flag */
236    status = U_ZERO_ERROR;
237    re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
238    TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
239    uregex_close(re);
240
241    /* openC with an invalid parameter */
242    status = U_ZERO_ERROR;
243    re = uregex_openC(NULL,
244        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
245    TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
246
247    /* openC with an invalid parameter */
248    status = U_USELESS_COLLATOR_ERROR;
249    re = uregex_openC(NULL,
250        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
251    TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
252
253    /* openC   open from a C string */
254    {
255        const UChar   *p;
256        int32_t  len;
257        status = U_ZERO_ERROR;
258        re = uregex_openC("abc*", 0, 0, &status);
259        TEST_ASSERT_SUCCESS(status);
260        p = uregex_pattern(re, &len, &status);
261        TEST_ASSERT_SUCCESS(status);
262
263        /* The TEST_ASSERT_SUCCESS above should change too... */
264        if(U_SUCCESS(status)) {
265            u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
266            TEST_ASSERT(u_strcmp(pat, p) == 0);
267            TEST_ASSERT(len==(int32_t)strlen("abc*"));
268        }
269
270        uregex_close(re);
271
272        /*  TODO:  Open with ParseError parameter */
273    }
274
275    /*
276     *  clone
277     */
278    {
279        URegularExpression *clone1;
280        URegularExpression *clone2;
281        URegularExpression *clone3;
282        UChar  testString1[30];
283        UChar  testString2[30];
284        UBool  result;
285
286
287        status = U_ZERO_ERROR;
288        re = uregex_openC("abc*", 0, 0, &status);
289        TEST_ASSERT_SUCCESS(status);
290        clone1 = uregex_clone(re, &status);
291        TEST_ASSERT_SUCCESS(status);
292        TEST_ASSERT(clone1 != NULL);
293
294        status = U_ZERO_ERROR;
295        clone2 = uregex_clone(re, &status);
296        TEST_ASSERT_SUCCESS(status);
297        TEST_ASSERT(clone2 != NULL);
298        uregex_close(re);
299
300        status = U_ZERO_ERROR;
301        clone3 = uregex_clone(clone2, &status);
302        TEST_ASSERT_SUCCESS(status);
303        TEST_ASSERT(clone3 != NULL);
304
305        u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
306        u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
307
308        status = U_ZERO_ERROR;
309        uregex_setText(clone1, testString1, -1, &status);
310        TEST_ASSERT_SUCCESS(status);
311        result = uregex_lookingAt(clone1, 0, &status);
312        TEST_ASSERT_SUCCESS(status);
313        TEST_ASSERT(result==TRUE);
314
315        status = U_ZERO_ERROR;
316        uregex_setText(clone2, testString2, -1, &status);
317        TEST_ASSERT_SUCCESS(status);
318        result = uregex_lookingAt(clone2, 0, &status);
319        TEST_ASSERT_SUCCESS(status);
320        TEST_ASSERT(result==FALSE);
321        result = uregex_find(clone2, 0, &status);
322        TEST_ASSERT_SUCCESS(status);
323        TEST_ASSERT(result==TRUE);
324
325        uregex_close(clone1);
326        uregex_close(clone2);
327        uregex_close(clone3);
328
329    }
330
331    /*
332     *  pattern()
333    */
334    {
335        const UChar  *resultPat;
336        int32_t       resultLen;
337        u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
338        status = U_ZERO_ERROR;
339        re = uregex_open(pat, -1, 0, NULL, &status);
340        resultPat = uregex_pattern(re, &resultLen, &status);
341        TEST_ASSERT_SUCCESS(status);
342
343        /* The TEST_ASSERT_SUCCESS above should change too... */
344        if (U_SUCCESS(status)) {
345            TEST_ASSERT(resultLen == -1);
346            TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
347        }
348
349        uregex_close(re);
350
351        status = U_ZERO_ERROR;
352        re = uregex_open(pat, 3, 0, NULL, &status);
353        resultPat = uregex_pattern(re, &resultLen, &status);
354        TEST_ASSERT_SUCCESS(status);
355        TEST_ASSERT_SUCCESS(status);
356
357        /* The TEST_ASSERT_SUCCESS above should change too... */
358        if (U_SUCCESS(status)) {
359            TEST_ASSERT(resultLen == 3);
360            TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
361            TEST_ASSERT(u_strlen(resultPat) == 3);
362        }
363
364        uregex_close(re);
365    }
366
367    /*
368     *  flags()
369     */
370    {
371        int32_t  t;
372
373        status = U_ZERO_ERROR;
374        re = uregex_open(pat, -1, 0, NULL, &status);
375        t  = uregex_flags(re, &status);
376        TEST_ASSERT_SUCCESS(status);
377        TEST_ASSERT(t == 0);
378        uregex_close(re);
379
380        status = U_ZERO_ERROR;
381        re = uregex_open(pat, -1, 0, NULL, &status);
382        t  = uregex_flags(re, &status);
383        TEST_ASSERT_SUCCESS(status);
384        TEST_ASSERT(t == 0);
385        uregex_close(re);
386
387        status = U_ZERO_ERROR;
388        re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
389        t  = uregex_flags(re, &status);
390        TEST_ASSERT_SUCCESS(status);
391        TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
392        uregex_close(re);
393    }
394
395    /*
396     *  setText() and lookingAt()
397     */
398    {
399        UChar  text1[50];
400        UChar  text2[50];
401        UBool  result;
402
403        u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
404        u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
405        status = U_ZERO_ERROR;
406        u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
407        re = uregex_open(pat, -1, 0, NULL, &status);
408        TEST_ASSERT_SUCCESS(status);
409
410        /* Operation before doing a setText should fail... */
411        status = U_ZERO_ERROR;
412        uregex_lookingAt(re, 0, &status);
413        TEST_ASSERT( status== U_REGEX_INVALID_STATE);
414
415        status = U_ZERO_ERROR;
416        uregex_setText(re, text1, -1, &status);
417        result = uregex_lookingAt(re, 0, &status);
418        TEST_ASSERT(result == TRUE);
419        TEST_ASSERT_SUCCESS(status);
420
421        status = U_ZERO_ERROR;
422        uregex_setText(re, text2, -1, &status);
423        result = uregex_lookingAt(re, 0, &status);
424        TEST_ASSERT(result == FALSE);
425        TEST_ASSERT_SUCCESS(status);
426
427        status = U_ZERO_ERROR;
428        uregex_setText(re, text1, -1, &status);
429        result = uregex_lookingAt(re, 0, &status);
430        TEST_ASSERT(result == TRUE);
431        TEST_ASSERT_SUCCESS(status);
432
433        status = U_ZERO_ERROR;
434        uregex_setText(re, text1, 5, &status);
435        result = uregex_lookingAt(re, 0, &status);
436        TEST_ASSERT(result == FALSE);
437        TEST_ASSERT_SUCCESS(status);
438
439        status = U_ZERO_ERROR;
440        uregex_setText(re, text1, 6, &status);
441        result = uregex_lookingAt(re, 0, &status);
442        TEST_ASSERT(result == TRUE);
443        TEST_ASSERT_SUCCESS(status);
444
445        uregex_close(re);
446    }
447
448
449    /*
450     *  getText()
451     */
452    {
453        UChar    text1[50];
454        UChar    text2[50];
455        const UChar   *result;
456        int32_t  textLength;
457
458        u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
459        u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
460        status = U_ZERO_ERROR;
461        u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
462        re = uregex_open(pat, -1, 0, NULL, &status);
463
464        uregex_setText(re, text1, -1, &status);
465        result = uregex_getText(re, &textLength, &status);
466        TEST_ASSERT(result == text1);
467        TEST_ASSERT(textLength == -1);
468        TEST_ASSERT_SUCCESS(status);
469
470        status = U_ZERO_ERROR;
471        uregex_setText(re, text2, 7, &status);
472        result = uregex_getText(re, &textLength, &status);
473        TEST_ASSERT(result == text2);
474        TEST_ASSERT(textLength == 7);
475        TEST_ASSERT_SUCCESS(status);
476
477        status = U_ZERO_ERROR;
478        uregex_setText(re, text2, 4, &status);
479        result = uregex_getText(re, &textLength, &status);
480        TEST_ASSERT(result == text2);
481        TEST_ASSERT(textLength == 4);
482        TEST_ASSERT_SUCCESS(status);
483        uregex_close(re);
484    }
485
486    /*
487     *  matches()
488     */
489    {
490        UChar   text1[50];
491        UBool   result;
492        int     len;
493        UChar   nullString[] = {0,0,0};
494
495        u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
496        status = U_ZERO_ERROR;
497        u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
498        re = uregex_open(pat, -1, 0, NULL, &status);
499
500        uregex_setText(re, text1, -1, &status);
501        result = uregex_matches(re, 0, &status);
502        TEST_ASSERT(result == FALSE);
503        TEST_ASSERT_SUCCESS(status);
504
505        status = U_ZERO_ERROR;
506        uregex_setText(re, text1, 6, &status);
507        result = uregex_matches(re, 0, &status);
508        TEST_ASSERT(result == TRUE);
509        TEST_ASSERT_SUCCESS(status);
510
511        status = U_ZERO_ERROR;
512        uregex_setText(re, text1, 6, &status);
513        result = uregex_matches(re, 1, &status);
514        TEST_ASSERT(result == FALSE);
515        TEST_ASSERT_SUCCESS(status);
516        uregex_close(re);
517
518        status = U_ZERO_ERROR;
519        re = uregex_openC(".?", 0, NULL, &status);
520        uregex_setText(re, text1, -1, &status);
521        len = u_strlen(text1);
522        result = uregex_matches(re, len, &status);
523        TEST_ASSERT(result == TRUE);
524        TEST_ASSERT_SUCCESS(status);
525
526        status = U_ZERO_ERROR;
527        uregex_setText(re, nullString, -1, &status);
528        TEST_ASSERT_SUCCESS(status);
529        result = uregex_matches(re, 0, &status);
530        TEST_ASSERT(result == TRUE);
531        TEST_ASSERT_SUCCESS(status);
532        uregex_close(re);
533    }
534
535
536    /*
537     *  lookingAt()    Used in setText test.
538     */
539
540
541    /*
542     *  find(), findNext, start, end, reset
543     */
544    {
545        UChar    text1[50];
546        UBool    result;
547        u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
548        status = U_ZERO_ERROR;
549        re = uregex_openC("rx", 0, NULL, &status);
550
551        uregex_setText(re, text1, -1, &status);
552        result = uregex_find(re, 0, &status);
553        TEST_ASSERT(result == TRUE);
554        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
555        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
556        TEST_ASSERT_SUCCESS(status);
557
558        result = uregex_find(re, 9, &status);
559        TEST_ASSERT(result == TRUE);
560        TEST_ASSERT(uregex_start(re, 0, &status) == 11);
561        TEST_ASSERT(uregex_end(re, 0, &status) == 13);
562        TEST_ASSERT_SUCCESS(status);
563
564        result = uregex_find(re, 14, &status);
565        TEST_ASSERT(result == FALSE);
566        TEST_ASSERT_SUCCESS(status);
567
568        status = U_ZERO_ERROR;
569        uregex_reset(re, 0, &status);
570
571        result = uregex_findNext(re, &status);
572        TEST_ASSERT(result == TRUE);
573        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
574        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
575        TEST_ASSERT_SUCCESS(status);
576
577        result = uregex_findNext(re, &status);
578        TEST_ASSERT(result == TRUE);
579        TEST_ASSERT(uregex_start(re, 0, &status) == 6);
580        TEST_ASSERT(uregex_end(re, 0, &status) == 8);
581        TEST_ASSERT_SUCCESS(status);
582
583        status = U_ZERO_ERROR;
584        uregex_reset(re, 12, &status);
585
586        result = uregex_findNext(re, &status);
587        TEST_ASSERT(result == TRUE);
588        TEST_ASSERT(uregex_start(re, 0, &status) == 13);
589        TEST_ASSERT(uregex_end(re, 0, &status) == 15);
590        TEST_ASSERT_SUCCESS(status);
591
592        result = uregex_findNext(re, &status);
593        TEST_ASSERT(result == FALSE);
594        TEST_ASSERT_SUCCESS(status);
595
596        uregex_close(re);
597    }
598
599    /*
600     *  groupCount
601     */
602    {
603        int32_t result;
604
605        status = U_ZERO_ERROR;
606        re = uregex_openC("abc", 0, NULL, &status);
607        result = uregex_groupCount(re, &status);
608        TEST_ASSERT_SUCCESS(status);
609        TEST_ASSERT(result == 0);
610        uregex_close(re);
611
612        status = U_ZERO_ERROR;
613        re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
614        result = uregex_groupCount(re, &status);
615        TEST_ASSERT_SUCCESS(status);
616        TEST_ASSERT(result == 3);
617        uregex_close(re);
618
619    }
620
621
622    /*
623     *  group()
624     */
625    {
626        UChar    text1[80];
627        UChar    buf[80];
628        UBool    result;
629        int32_t  resultSz;
630        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
631
632        status = U_ZERO_ERROR;
633        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
634        TEST_ASSERT_SUCCESS(status);
635
636
637        uregex_setText(re, text1, -1, &status);
638        result = uregex_find(re, 0, &status);
639        TEST_ASSERT(result==TRUE);
640
641        /*  Capture Group 0, the full match.  Should succeed.  */
642        status = U_ZERO_ERROR;
643        resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
644        TEST_ASSERT_SUCCESS(status);
645        TEST_ASSERT_STRING("abc interior def", buf, TRUE);
646        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
647
648        /*  Capture group #1.  Should succeed. */
649        status = U_ZERO_ERROR;
650        resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
651        TEST_ASSERT_SUCCESS(status);
652        TEST_ASSERT_STRING(" interior ", buf, TRUE);
653        TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
654
655        /*  Capture group out of range.  Error. */
656        status = U_ZERO_ERROR;
657        uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
658        TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
659
660        /* NULL buffer, pure pre-flight */
661        status = U_ZERO_ERROR;
662        resultSz = uregex_group(re, 0, NULL, 0, &status);
663        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
664        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
665
666        /* Too small buffer, truncated string */
667        status = U_ZERO_ERROR;
668        memset(buf, -1, sizeof(buf));
669        resultSz = uregex_group(re, 0, buf, 5, &status);
670        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
671        TEST_ASSERT_STRING("abc i", buf, FALSE);
672        TEST_ASSERT(buf[5] == (UChar)0xffff);
673        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
674
675        /* Output string just fits buffer, no NUL term. */
676        status = U_ZERO_ERROR;
677        resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
678        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
679        TEST_ASSERT_STRING("abc interior def", buf, FALSE);
680        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
681        TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
682
683        uregex_close(re);
684
685    }
686
687    /*
688     *  Regions
689     */
690
691
692        /* SetRegion(), getRegion() do something  */
693        TEST_SETUP(".*", "0123456789ABCDEF", 0)
694        UChar resultString[40];
695        TEST_ASSERT(uregex_regionStart(re, &status) == 0);
696        TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
697        uregex_setRegion(re, 3, 6, &status);
698        TEST_ASSERT(uregex_regionStart(re, &status) == 3);
699        TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
700        TEST_ASSERT(uregex_findNext(re, &status));
701        TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
702        TEST_ASSERT_STRING("345", resultString, TRUE);
703        TEST_TEARDOWN;
704
705        /* find(start=-1) uses regions   */
706        TEST_SETUP(".*", "0123456789ABCDEF", 0);
707        uregex_setRegion(re, 4, 6, &status);
708        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
709        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
710        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
711        TEST_TEARDOWN;
712
713        /* find (start >=0) does not use regions   */
714        TEST_SETUP(".*", "0123456789ABCDEF", 0);
715        uregex_setRegion(re, 4, 6, &status);
716        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
717        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
718        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
719        TEST_TEARDOWN;
720
721        /* findNext() obeys regions    */
722        TEST_SETUP(".", "0123456789ABCDEF", 0);
723        uregex_setRegion(re, 4, 6, &status);
724        TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
725        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
726        TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
727        TEST_ASSERT(uregex_start(re, 0, &status) == 5);
728        TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
729        TEST_TEARDOWN;
730
731        /* matches(start=-1) uses regions                                           */
732        /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
733        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
734        uregex_setRegion(re, 4, 6, &status);
735        TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
736        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
737        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
738        TEST_TEARDOWN;
739
740        /* matches (start >=0) does not use regions       */
741        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
742        uregex_setRegion(re, 4, 6, &status);
743        TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
744        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
745        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
746        TEST_TEARDOWN;
747
748        /* lookingAt(start=-1) uses regions                                         */
749        /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
750        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
751        uregex_setRegion(re, 4, 6, &status);
752        TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
753        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
754        TEST_ASSERT(uregex_end(re, 0, &status) == 4);
755        TEST_TEARDOWN;
756
757        /* lookingAt (start >=0) does not use regions  */
758        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
759        uregex_setRegion(re, 4, 6, &status);
760        TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
761        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
762        TEST_ASSERT(uregex_end(re, 0, &status) == 0);
763        TEST_TEARDOWN;
764
765        /* hitEnd()       */
766        TEST_SETUP("[a-f]*", "abcdefghij", 0);
767        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
768        TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
769        TEST_TEARDOWN;
770
771        TEST_SETUP("[a-f]*", "abcdef", 0);
772        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
773        TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
774        TEST_TEARDOWN;
775
776        /* requireEnd   */
777        TEST_SETUP("abcd", "abcd", 0);
778        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
779        TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
780        TEST_TEARDOWN;
781
782        TEST_SETUP("abcd$", "abcd", 0);
783        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
784        TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
785        TEST_TEARDOWN;
786
787        /* anchoringBounds        */
788        TEST_SETUP("abc$", "abcdef", 0);
789        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
790        uregex_useAnchoringBounds(re, FALSE, &status);
791        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
792
793        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
794        uregex_useAnchoringBounds(re, TRUE, &status);
795        uregex_setRegion(re, 0, 3, &status);
796        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
797        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
798        TEST_TEARDOWN;
799
800        /* Transparent Bounds      */
801        TEST_SETUP("abc(?=def)", "abcdef", 0);
802        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
803        uregex_useTransparentBounds(re, TRUE, &status);
804        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
805
806        uregex_useTransparentBounds(re, FALSE, &status);
807        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
808        uregex_setRegion(re, 0, 3, &status);
809        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
810        uregex_useTransparentBounds(re, TRUE, &status);
811        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
812        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
813        TEST_TEARDOWN;
814
815
816    /*
817     *  replaceFirst()
818     */
819    {
820        UChar    text1[80];
821        UChar    text2[80];
822        UChar    replText[80];
823        UChar    buf[80];
824        int32_t  resultSz;
825        u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
826        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
827        u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
828
829        status = U_ZERO_ERROR;
830        re = uregex_openC("x(.*?)x", 0, NULL, &status);
831        TEST_ASSERT_SUCCESS(status);
832
833        /*  Normal case, with match */
834        uregex_setText(re, text1, -1, &status);
835        resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
836        TEST_ASSERT_SUCCESS(status);
837        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
838        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
839
840        /* No match.  Text should copy to output with no changes.  */
841        status = U_ZERO_ERROR;
842        uregex_setText(re, text2, -1, &status);
843        resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
844        TEST_ASSERT_SUCCESS(status);
845        TEST_ASSERT_STRING("No match here.", buf, TRUE);
846        TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
847
848        /*  Match, output just fills buffer, no termination warning. */
849        status = U_ZERO_ERROR;
850        uregex_setText(re, text1, -1, &status);
851        memset(buf, -1, sizeof(buf));
852        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
853        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
854        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
855        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
856        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
857
858        /* Do the replaceFirst again, without first resetting anything.
859         *  Should give the same results.
860         */
861        status = U_ZERO_ERROR;
862        memset(buf, -1, sizeof(buf));
863        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
864        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
865        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
866        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
867        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
868
869        /* NULL buffer, zero buffer length */
870        status = U_ZERO_ERROR;
871        resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
872        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
873        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
874
875        /* Buffer too small by one */
876        status = U_ZERO_ERROR;
877        memset(buf, -1, sizeof(buf));
878        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
879        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
880        TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
881        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
882        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
883
884        uregex_close(re);
885    }
886
887
888    /*
889     *  replaceAll()
890     */
891    {
892        UChar    text1[80];          /*  "Replace xaax x1x x...x." */
893        UChar    text2[80];          /*  "No match Here"           */
894        UChar    replText[80];       /*  "<$1>"                    */
895        UChar    replText2[80];      /*  "<<$1>>"                  */
896        const char * pattern = "x(.*?)x";
897        const char * expectedResult = "Replace <aa> <1> <...>.";
898        const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
899        UChar    buf[80];
900        int32_t  resultSize;
901        int32_t  expectedResultSize;
902        int32_t  expectedResultSize2;
903        int32_t  i;
904
905        u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
906        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
907        u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
908        u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
909        expectedResultSize = strlen(expectedResult);
910        expectedResultSize2 = strlen(expectedResult2);
911
912        status = U_ZERO_ERROR;
913        re = uregex_openC(pattern, 0, NULL, &status);
914        TEST_ASSERT_SUCCESS(status);
915
916        /*  Normal case, with match */
917        uregex_setText(re, text1, -1, &status);
918        resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
919        TEST_ASSERT_SUCCESS(status);
920        TEST_ASSERT_STRING(expectedResult, buf, TRUE);
921        TEST_ASSERT(resultSize == expectedResultSize);
922
923        /* No match.  Text should copy to output with no changes.  */
924        status = U_ZERO_ERROR;
925        uregex_setText(re, text2, -1, &status);
926        resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
927        TEST_ASSERT_SUCCESS(status);
928        TEST_ASSERT_STRING("No match here.", buf, TRUE);
929        TEST_ASSERT(resultSize == u_strlen(text2));
930
931        /*  Match, output just fills buffer, no termination warning. */
932        status = U_ZERO_ERROR;
933        uregex_setText(re, text1, -1, &status);
934        memset(buf, -1, sizeof(buf));
935        resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
936        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
937        TEST_ASSERT_STRING(expectedResult, buf, FALSE);
938        TEST_ASSERT(resultSize == expectedResultSize);
939        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
940
941        /* Do the replaceFirst again, without first resetting anything.
942         *  Should give the same results.
943         */
944        status = U_ZERO_ERROR;
945        memset(buf, -1, sizeof(buf));
946        resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
947        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
948        TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
949        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
950        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
951
952        /* NULL buffer, zero buffer length */
953        status = U_ZERO_ERROR;
954        resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
955        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
956        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
957
958        /* Buffer too small.  Try every size, which will tickle edge cases
959         * in uregex_appendReplacement (used by replaceAll)   */
960        for (i=0; i<expectedResultSize; i++) {
961            char  expected[80];
962            status = U_ZERO_ERROR;
963            memset(buf, -1, sizeof(buf));
964            resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
965            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
966            strcpy(expected, expectedResult);
967            expected[i] = 0;
968            TEST_ASSERT_STRING(expected, buf, FALSE);
969            TEST_ASSERT(resultSize == expectedResultSize);
970            TEST_ASSERT(buf[i] == (UChar)0xffff);
971        }
972
973        /* Buffer too small.  Same as previous test, except this time the replacement
974         * text is longer than the match capture group, making the length of the complete
975         * replacement longer than the original string.
976         */
977        for (i=0; i<expectedResultSize2; i++) {
978            char  expected[80];
979            status = U_ZERO_ERROR;
980            memset(buf, -1, sizeof(buf));
981            resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
982            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
983            strcpy(expected, expectedResult2);
984            expected[i] = 0;
985            TEST_ASSERT_STRING(expected, buf, FALSE);
986            TEST_ASSERT(resultSize == expectedResultSize2);
987            TEST_ASSERT(buf[i] == (UChar)0xffff);
988        }
989
990
991        uregex_close(re);
992    }
993
994
995    /*
996     *  appendReplacement()
997     */
998    {
999        UChar    text[100];
1000        UChar    repl[100];
1001        UChar    buf[100];
1002        UChar   *bufPtr;
1003        int32_t  bufCap;
1004
1005
1006        status = U_ZERO_ERROR;
1007        re = uregex_openC(".*", 0, 0, &status);
1008        TEST_ASSERT_SUCCESS(status);
1009
1010        u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1011        u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1012        uregex_setText(re, text, -1, &status);
1013
1014        /* match covers whole target string */
1015        uregex_find(re, 0, &status);
1016        TEST_ASSERT_SUCCESS(status);
1017        bufPtr = buf;
1018        bufCap = UPRV_LENGTHOF(buf);
1019        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1020        TEST_ASSERT_SUCCESS(status);
1021        TEST_ASSERT_STRING("some other", buf, TRUE);
1022
1023        /* Match has \u \U escapes */
1024        uregex_find(re, 0, &status);
1025        TEST_ASSERT_SUCCESS(status);
1026        bufPtr = buf;
1027        bufCap = UPRV_LENGTHOF(buf);
1028        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1029        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1030        TEST_ASSERT_SUCCESS(status);
1031        TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1032
1033        /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1034        status = U_ZERO_ERROR;
1035        uregex_find(re, 0, &status);
1036        TEST_ASSERT_SUCCESS(status);
1037        bufPtr = buf;
1038        status = U_BUFFER_OVERFLOW_ERROR;
1039        uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1040        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1041
1042        uregex_close(re);
1043    }
1044
1045
1046    /*
1047     *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1048     */
1049
1050    /*
1051     *  split()
1052     */
1053    {
1054        UChar    textToSplit[80];
1055        UChar    text2[80];
1056        UChar    buf[200];
1057        UChar    *fields[10];
1058        int32_t  numFields;
1059        int32_t  requiredCapacity;
1060        int32_t  spaceNeeded;
1061        int32_t  sz;
1062
1063        u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1064        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1065
1066        status = U_ZERO_ERROR;
1067        re = uregex_openC(":", 0, NULL, &status);
1068
1069
1070        /*  Simple split */
1071
1072        uregex_setText(re, textToSplit, -1, &status);
1073        TEST_ASSERT_SUCCESS(status);
1074
1075        /* The TEST_ASSERT_SUCCESS call above should change too... */
1076        if (U_SUCCESS(status)) {
1077            memset(fields, -1, sizeof(fields));
1078            numFields =
1079                uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1080            TEST_ASSERT_SUCCESS(status);
1081
1082            /* The TEST_ASSERT_SUCCESS call above should change too... */
1083            if(U_SUCCESS(status)) {
1084                TEST_ASSERT(numFields == 3);
1085                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1086                TEST_ASSERT_STRING(" second", fields[1], TRUE);
1087                TEST_ASSERT_STRING("  third", fields[2], TRUE);
1088                TEST_ASSERT(fields[3] == NULL);
1089
1090                spaceNeeded = u_strlen(textToSplit) -
1091                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1092                            numFields;          /* Each field gets a NUL terminator */
1093
1094                TEST_ASSERT(spaceNeeded == requiredCapacity);
1095            }
1096        }
1097
1098        uregex_close(re);
1099
1100
1101        /*  Split with too few output strings available */
1102        status = U_ZERO_ERROR;
1103        re = uregex_openC(":", 0, NULL, &status);
1104        uregex_setText(re, textToSplit, -1, &status);
1105        TEST_ASSERT_SUCCESS(status);
1106
1107        /* The TEST_ASSERT_SUCCESS call above should change too... */
1108        if(U_SUCCESS(status)) {
1109            memset(fields, -1, sizeof(fields));
1110            numFields =
1111                uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1112            TEST_ASSERT_SUCCESS(status);
1113
1114            /* The TEST_ASSERT_SUCCESS call above should change too... */
1115            if(U_SUCCESS(status)) {
1116                TEST_ASSERT(numFields == 2);
1117                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1118                TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1119                TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1120
1121                spaceNeeded = u_strlen(textToSplit) -
1122                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1123                            numFields;          /* Each field gets a NUL terminator */
1124
1125                TEST_ASSERT(spaceNeeded == requiredCapacity);
1126
1127                /* Split with a range of output buffer sizes.  */
1128                spaceNeeded = u_strlen(textToSplit) -
1129                    (numFields - 1)  +  /* Field delimiters do not appear in output */
1130                    numFields;          /* Each field gets a NUL terminator */
1131
1132                for (sz=0; sz < spaceNeeded+1; sz++) {
1133                    memset(fields, -1, sizeof(fields));
1134                    status = U_ZERO_ERROR;
1135                    numFields =
1136                        uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1137                    if (sz >= spaceNeeded) {
1138                        TEST_ASSERT_SUCCESS(status);
1139                        TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1140                        TEST_ASSERT_STRING(" second", fields[1], TRUE);
1141                        TEST_ASSERT_STRING("  third", fields[2], TRUE);
1142                    } else {
1143                        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1144                    }
1145                    TEST_ASSERT(numFields == 3);
1146                    TEST_ASSERT(fields[3] == NULL);
1147                    TEST_ASSERT(spaceNeeded == requiredCapacity);
1148                }
1149            }
1150        }
1151
1152        uregex_close(re);
1153    }
1154
1155
1156
1157
1158    /* Split(), part 2.  Patterns with capture groups.  The capture group text
1159     *                   comes out as additional fields.  */
1160    {
1161        UChar    textToSplit[80];
1162        UChar    buf[200];
1163        UChar    *fields[10];
1164        int32_t  numFields;
1165        int32_t  requiredCapacity;
1166        int32_t  spaceNeeded;
1167        int32_t  sz;
1168
1169        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
1170
1171        status = U_ZERO_ERROR;
1172        re = uregex_openC("<(.*?)>", 0, NULL, &status);
1173
1174        uregex_setText(re, textToSplit, -1, &status);
1175        TEST_ASSERT_SUCCESS(status);
1176
1177        /* The TEST_ASSERT_SUCCESS call above should change too... */
1178        if(U_SUCCESS(status)) {
1179            memset(fields, -1, sizeof(fields));
1180            numFields =
1181                uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1182            TEST_ASSERT_SUCCESS(status);
1183
1184            /* The TEST_ASSERT_SUCCESS call above should change too... */
1185            if(U_SUCCESS(status)) {
1186                TEST_ASSERT(numFields == 5);
1187                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1188                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1189                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1190                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1191                TEST_ASSERT_STRING("  third", fields[4], TRUE);
1192                TEST_ASSERT(fields[5] == NULL);
1193                spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1194                TEST_ASSERT(spaceNeeded == requiredCapacity);
1195            }
1196        }
1197
1198        /*  Split with too few output strings available (2) */
1199        status = U_ZERO_ERROR;
1200        memset(fields, -1, sizeof(fields));
1201        numFields =
1202            uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1203        TEST_ASSERT_SUCCESS(status);
1204
1205        /* The TEST_ASSERT_SUCCESS call above should change too... */
1206        if(U_SUCCESS(status)) {
1207            TEST_ASSERT(numFields == 2);
1208            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1209            TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1210            TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1211
1212            spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1213            TEST_ASSERT(spaceNeeded == requiredCapacity);
1214        }
1215
1216        /*  Split with too few output strings available (3) */
1217        status = U_ZERO_ERROR;
1218        memset(fields, -1, sizeof(fields));
1219        numFields =
1220            uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1221        TEST_ASSERT_SUCCESS(status);
1222
1223        /* The TEST_ASSERT_SUCCESS call above should change too... */
1224        if(U_SUCCESS(status)) {
1225            TEST_ASSERT(numFields == 3);
1226            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1227            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1228            TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1229            TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1230
1231            spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1232            TEST_ASSERT(spaceNeeded == requiredCapacity);
1233        }
1234
1235        /*  Split with just enough output strings available (5) */
1236        status = U_ZERO_ERROR;
1237        memset(fields, -1, sizeof(fields));
1238        numFields =
1239            uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1240        TEST_ASSERT_SUCCESS(status);
1241
1242        /* The TEST_ASSERT_SUCCESS call above should change too... */
1243        if(U_SUCCESS(status)) {
1244            TEST_ASSERT(numFields == 5);
1245            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1246            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1247            TEST_ASSERT_STRING(" second", fields[2], TRUE);
1248            TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1249            TEST_ASSERT_STRING("  third", fields[4], TRUE);
1250            TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1251
1252            spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1253            TEST_ASSERT(spaceNeeded == requiredCapacity);
1254        }
1255
1256        /* Split, end of text is a field delimiter.   */
1257        status = U_ZERO_ERROR;
1258        sz = strlen("first <tag-a> second<tag-b>");
1259        uregex_setText(re, textToSplit, sz, &status);
1260        TEST_ASSERT_SUCCESS(status);
1261
1262        /* The TEST_ASSERT_SUCCESS call above should change too... */
1263        if(U_SUCCESS(status)) {
1264            memset(fields, -1, sizeof(fields));
1265            numFields =
1266                uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1267            TEST_ASSERT_SUCCESS(status);
1268
1269            /* The TEST_ASSERT_SUCCESS call above should change too... */
1270            if(U_SUCCESS(status)) {
1271                TEST_ASSERT(numFields == 5);
1272                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1273                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1274                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1275                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1276                TEST_ASSERT_STRING("",        fields[4], TRUE);
1277                TEST_ASSERT(fields[5] == NULL);
1278                TEST_ASSERT(fields[8] == NULL);
1279                TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1280                spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1281                TEST_ASSERT(spaceNeeded == requiredCapacity);
1282            }
1283        }
1284
1285        uregex_close(re);
1286    }
1287
1288    /*
1289     * set/getTimeLimit
1290     */
1291     TEST_SETUP("abc$", "abcdef", 0);
1292     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1293     uregex_setTimeLimit(re, 1000, &status);
1294     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1295     TEST_ASSERT_SUCCESS(status);
1296     uregex_setTimeLimit(re, -1, &status);
1297     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1298     status = U_ZERO_ERROR;
1299     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1300     TEST_TEARDOWN;
1301
1302     /*
1303      * set/get Stack Limit
1304      */
1305     TEST_SETUP("abc$", "abcdef", 0);
1306     TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1307     uregex_setStackLimit(re, 40000, &status);
1308     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1309     TEST_ASSERT_SUCCESS(status);
1310     uregex_setStackLimit(re, -1, &status);
1311     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1312     status = U_ZERO_ERROR;
1313     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1314     TEST_TEARDOWN;
1315
1316
1317     /*
1318      * Get/Set callback functions
1319      *     This test is copied from intltest regex/Callbacks
1320      *     The pattern and test data will run long enough to cause the callback
1321      *       to be invoked.  The nested '+' operators give exponential time
1322      *       behavior with increasing string length.
1323      */
1324     TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1325     callBackContext cbInfo = {4, 0, 0};
1326     const void     *pContext   = &cbInfo;
1327     URegexMatchCallback    *returnedFn = &TestCallbackFn;
1328
1329     /*  Getting the callback fn when it hasn't been set must return NULL  */
1330     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1331     TEST_ASSERT_SUCCESS(status);
1332     TEST_ASSERT(returnedFn == NULL);
1333     TEST_ASSERT(pContext == NULL);
1334
1335     /* Set thecallback and do a match.                                   */
1336     /* The callback function should record that it has been called.      */
1337     uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1338     TEST_ASSERT_SUCCESS(status);
1339     TEST_ASSERT(cbInfo.numCalls == 0);
1340     TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1341     TEST_ASSERT_SUCCESS(status);
1342     TEST_ASSERT(cbInfo.numCalls > 0);
1343
1344     /* Getting the callback should return the values that were set above.  */
1345     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1346     TEST_ASSERT(returnedFn == &TestCallbackFn);
1347     TEST_ASSERT(pContext == &cbInfo);
1348
1349     TEST_TEARDOWN;
1350}
1351
1352
1353
1354static void TestBug4315(void) {
1355    UErrorCode      theICUError = U_ZERO_ERROR;
1356    URegularExpression *theRegEx;
1357    UChar           *textBuff;
1358    const char      *thePattern;
1359    UChar            theString[100];
1360    UChar           *destFields[24];
1361    int32_t         neededLength1;
1362    int32_t         neededLength2;
1363
1364    int32_t         wordCount = 0;
1365    int32_t         destFieldsSize = 24;
1366
1367    thePattern  = "ck ";
1368    u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1369
1370    /* open a regex */
1371    theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1372    TEST_ASSERT_SUCCESS(theICUError);
1373
1374    /* set the input string */
1375    uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1376    TEST_ASSERT_SUCCESS(theICUError);
1377
1378    /* split */
1379    /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1380     *  error occurs! */
1381    wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1382        destFieldsSize, &theICUError);
1383
1384    TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1385    TEST_ASSERT(wordCount==3);
1386
1387    if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1388    {
1389        theICUError = U_ZERO_ERROR;
1390        textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1391        wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1392            destFields, destFieldsSize, &theICUError);
1393        TEST_ASSERT(wordCount==3);
1394        TEST_ASSERT_SUCCESS(theICUError);
1395        TEST_ASSERT(neededLength1 == neededLength2);
1396        TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1397        TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1398        TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1399        TEST_ASSERT(destFields[3] == NULL);
1400        free(textBuff);
1401    }
1402    uregex_close(theRegEx);
1403}
1404
1405/* Based on TestRegexCAPI() */
1406static void TestUTextAPI(void) {
1407    UErrorCode           status = U_ZERO_ERROR;
1408    URegularExpression  *re;
1409    UText                patternText = UTEXT_INITIALIZER;
1410    UChar                pat[200];
1411    const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1412
1413    /* Mimimalist open/close */
1414    utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1415    re = uregex_openUText(&patternText, 0, 0, &status);
1416    if (U_FAILURE(status)) {
1417         log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1418         utext_close(&patternText);
1419         return;
1420    }
1421    uregex_close(re);
1422
1423    /* Open with all flag values set */
1424    status = U_ZERO_ERROR;
1425    re = uregex_openUText(&patternText,
1426        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1427        0, &status);
1428    TEST_ASSERT_SUCCESS(status);
1429    uregex_close(re);
1430
1431    /* Open with an invalid flag */
1432    status = U_ZERO_ERROR;
1433    re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1434    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1435    uregex_close(re);
1436
1437    /* open with an invalid parameter */
1438    status = U_ZERO_ERROR;
1439    re = uregex_openUText(NULL,
1440        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1441    TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1442
1443    /*
1444     *  clone
1445     */
1446    {
1447        URegularExpression *clone1;
1448        URegularExpression *clone2;
1449        URegularExpression *clone3;
1450        UChar  testString1[30];
1451        UChar  testString2[30];
1452        UBool  result;
1453
1454
1455        status = U_ZERO_ERROR;
1456        re = uregex_openUText(&patternText, 0, 0, &status);
1457        TEST_ASSERT_SUCCESS(status);
1458        clone1 = uregex_clone(re, &status);
1459        TEST_ASSERT_SUCCESS(status);
1460        TEST_ASSERT(clone1 != NULL);
1461
1462        status = U_ZERO_ERROR;
1463        clone2 = uregex_clone(re, &status);
1464        TEST_ASSERT_SUCCESS(status);
1465        TEST_ASSERT(clone2 != NULL);
1466        uregex_close(re);
1467
1468        status = U_ZERO_ERROR;
1469        clone3 = uregex_clone(clone2, &status);
1470        TEST_ASSERT_SUCCESS(status);
1471        TEST_ASSERT(clone3 != NULL);
1472
1473        u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1474        u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1475
1476        status = U_ZERO_ERROR;
1477        uregex_setText(clone1, testString1, -1, &status);
1478        TEST_ASSERT_SUCCESS(status);
1479        result = uregex_lookingAt(clone1, 0, &status);
1480        TEST_ASSERT_SUCCESS(status);
1481        TEST_ASSERT(result==TRUE);
1482
1483        status = U_ZERO_ERROR;
1484        uregex_setText(clone2, testString2, -1, &status);
1485        TEST_ASSERT_SUCCESS(status);
1486        result = uregex_lookingAt(clone2, 0, &status);
1487        TEST_ASSERT_SUCCESS(status);
1488        TEST_ASSERT(result==FALSE);
1489        result = uregex_find(clone2, 0, &status);
1490        TEST_ASSERT_SUCCESS(status);
1491        TEST_ASSERT(result==TRUE);
1492
1493        uregex_close(clone1);
1494        uregex_close(clone2);
1495        uregex_close(clone3);
1496
1497    }
1498
1499    /*
1500     *  pattern() and patternText()
1501     */
1502    {
1503        const UChar  *resultPat;
1504        int32_t       resultLen;
1505        UText        *resultText;
1506        const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1507        const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1508        u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1509        status = U_ZERO_ERROR;
1510
1511        utext_openUTF8(&patternText, str_hello, -1, &status);
1512        re = uregex_open(pat, -1, 0, NULL, &status);
1513        resultPat = uregex_pattern(re, &resultLen, &status);
1514        TEST_ASSERT_SUCCESS(status);
1515
1516        /* The TEST_ASSERT_SUCCESS above should change too... */
1517        if (U_SUCCESS(status)) {
1518            TEST_ASSERT(resultLen == -1);
1519            TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1520        }
1521
1522        resultText = uregex_patternUText(re, &status);
1523        TEST_ASSERT_SUCCESS(status);
1524        TEST_ASSERT_UTEXT(str_hello, resultText);
1525
1526        uregex_close(re);
1527
1528        status = U_ZERO_ERROR;
1529        re = uregex_open(pat, 3, 0, NULL, &status);
1530        resultPat = uregex_pattern(re, &resultLen, &status);
1531        TEST_ASSERT_SUCCESS(status);
1532
1533        /* The TEST_ASSERT_SUCCESS above should change too... */
1534        if (U_SUCCESS(status)) {
1535            TEST_ASSERT(resultLen == 3);
1536            TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1537            TEST_ASSERT(u_strlen(resultPat) == 3);
1538        }
1539
1540        resultText = uregex_patternUText(re, &status);
1541        TEST_ASSERT_SUCCESS(status);
1542        TEST_ASSERT_UTEXT(str_hel, resultText);
1543
1544        uregex_close(re);
1545    }
1546
1547    /*
1548     *  setUText() and lookingAt()
1549     */
1550    {
1551        UText  text1 = UTEXT_INITIALIZER;
1552        UText  text2 = UTEXT_INITIALIZER;
1553        UBool  result;
1554        const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1555        const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1556        const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1557        status = U_ZERO_ERROR;
1558        utext_openUTF8(&text1, str_abcccd, -1, &status);
1559        utext_openUTF8(&text2, str_abcccxd, -1, &status);
1560
1561        utext_openUTF8(&patternText, str_abcd, -1, &status);
1562        re = uregex_openUText(&patternText, 0, NULL, &status);
1563        TEST_ASSERT_SUCCESS(status);
1564
1565        /* Operation before doing a setText should fail... */
1566        status = U_ZERO_ERROR;
1567        uregex_lookingAt(re, 0, &status);
1568        TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1569
1570        status = U_ZERO_ERROR;
1571        uregex_setUText(re, &text1, &status);
1572        result = uregex_lookingAt(re, 0, &status);
1573        TEST_ASSERT(result == TRUE);
1574        TEST_ASSERT_SUCCESS(status);
1575
1576        status = U_ZERO_ERROR;
1577        uregex_setUText(re, &text2, &status);
1578        result = uregex_lookingAt(re, 0, &status);
1579        TEST_ASSERT(result == FALSE);
1580        TEST_ASSERT_SUCCESS(status);
1581
1582        status = U_ZERO_ERROR;
1583        uregex_setUText(re, &text1, &status);
1584        result = uregex_lookingAt(re, 0, &status);
1585        TEST_ASSERT(result == TRUE);
1586        TEST_ASSERT_SUCCESS(status);
1587
1588        uregex_close(re);
1589        utext_close(&text1);
1590        utext_close(&text2);
1591    }
1592
1593
1594    /*
1595     *  getText() and getUText()
1596     */
1597    {
1598        UText  text1 = UTEXT_INITIALIZER;
1599        UText  text2 = UTEXT_INITIALIZER;
1600        UChar  text2Chars[20];
1601        UText  *resultText;
1602        const UChar   *result;
1603        int32_t  textLength;
1604        const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1605        const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1606        const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1607
1608
1609        status = U_ZERO_ERROR;
1610        utext_openUTF8(&text1, str_abcccd, -1, &status);
1611        u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1612        utext_openUChars(&text2, text2Chars, -1, &status);
1613
1614        utext_openUTF8(&patternText, str_abcd, -1, &status);
1615        re = uregex_openUText(&patternText, 0, NULL, &status);
1616
1617        /* First set a UText */
1618        uregex_setUText(re, &text1, &status);
1619        resultText = uregex_getUText(re, NULL, &status);
1620        TEST_ASSERT_SUCCESS(status);
1621        TEST_ASSERT(resultText != &text1);
1622        utext_setNativeIndex(resultText, 0);
1623        utext_setNativeIndex(&text1, 0);
1624        TEST_ASSERT(testUTextEqual(resultText, &text1));
1625        utext_close(resultText);
1626
1627        result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1628        (void)result;    /* Suppress set but not used warning. */
1629        TEST_ASSERT(textLength == -1 || textLength == 6);
1630        resultText = uregex_getUText(re, NULL, &status);
1631        TEST_ASSERT_SUCCESS(status);
1632        TEST_ASSERT(resultText != &text1);
1633        utext_setNativeIndex(resultText, 0);
1634        utext_setNativeIndex(&text1, 0);
1635        TEST_ASSERT(testUTextEqual(resultText, &text1));
1636        utext_close(resultText);
1637
1638        /* Then set a UChar * */
1639        uregex_setText(re, text2Chars, 7, &status);
1640        resultText = uregex_getUText(re, NULL, &status);
1641        TEST_ASSERT_SUCCESS(status);
1642        utext_setNativeIndex(resultText, 0);
1643        utext_setNativeIndex(&text2, 0);
1644        TEST_ASSERT(testUTextEqual(resultText, &text2));
1645        utext_close(resultText);
1646        result = uregex_getText(re, &textLength, &status);
1647        TEST_ASSERT(textLength == 7);
1648
1649        uregex_close(re);
1650        utext_close(&text1);
1651        utext_close(&text2);
1652    }
1653
1654    /*
1655     *  matches()
1656     */
1657    {
1658        UText   text1 = UTEXT_INITIALIZER;
1659        UBool   result;
1660        UText   nullText = UTEXT_INITIALIZER;
1661        const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1662        const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1663
1664        status = U_ZERO_ERROR;
1665        utext_openUTF8(&text1, str_abcccde, -1, &status);
1666        utext_openUTF8(&patternText, str_abcd, -1, &status);
1667        re = uregex_openUText(&patternText, 0, NULL, &status);
1668
1669        uregex_setUText(re, &text1, &status);
1670        result = uregex_matches(re, 0, &status);
1671        TEST_ASSERT(result == FALSE);
1672        TEST_ASSERT_SUCCESS(status);
1673        uregex_close(re);
1674
1675        status = U_ZERO_ERROR;
1676        re = uregex_openC(".?", 0, NULL, &status);
1677        uregex_setUText(re, &text1, &status);
1678        result = uregex_matches(re, 7, &status);
1679        TEST_ASSERT(result == TRUE);
1680        TEST_ASSERT_SUCCESS(status);
1681
1682        status = U_ZERO_ERROR;
1683        utext_openUTF8(&nullText, "", -1, &status);
1684        uregex_setUText(re, &nullText, &status);
1685        TEST_ASSERT_SUCCESS(status);
1686        result = uregex_matches(re, 0, &status);
1687        TEST_ASSERT(result == TRUE);
1688        TEST_ASSERT_SUCCESS(status);
1689
1690        uregex_close(re);
1691        utext_close(&text1);
1692        utext_close(&nullText);
1693    }
1694
1695
1696    /*
1697     *  lookingAt()    Used in setText test.
1698     */
1699
1700
1701    /*
1702     *  find(), findNext, start, end, reset
1703     */
1704    {
1705        UChar    text1[50];
1706        UBool    result;
1707        u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
1708        status = U_ZERO_ERROR;
1709        re = uregex_openC("rx", 0, NULL, &status);
1710
1711        uregex_setText(re, text1, -1, &status);
1712        result = uregex_find(re, 0, &status);
1713        TEST_ASSERT(result == TRUE);
1714        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1715        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1716        TEST_ASSERT_SUCCESS(status);
1717
1718        result = uregex_find(re, 9, &status);
1719        TEST_ASSERT(result == TRUE);
1720        TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1721        TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1722        TEST_ASSERT_SUCCESS(status);
1723
1724        result = uregex_find(re, 14, &status);
1725        TEST_ASSERT(result == FALSE);
1726        TEST_ASSERT_SUCCESS(status);
1727
1728        status = U_ZERO_ERROR;
1729        uregex_reset(re, 0, &status);
1730
1731        result = uregex_findNext(re, &status);
1732        TEST_ASSERT(result == TRUE);
1733        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1734        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1735        TEST_ASSERT_SUCCESS(status);
1736
1737        result = uregex_findNext(re, &status);
1738        TEST_ASSERT(result == TRUE);
1739        TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1740        TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1741        TEST_ASSERT_SUCCESS(status);
1742
1743        status = U_ZERO_ERROR;
1744        uregex_reset(re, 12, &status);
1745
1746        result = uregex_findNext(re, &status);
1747        TEST_ASSERT(result == TRUE);
1748        TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1749        TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1750        TEST_ASSERT_SUCCESS(status);
1751
1752        result = uregex_findNext(re, &status);
1753        TEST_ASSERT(result == FALSE);
1754        TEST_ASSERT_SUCCESS(status);
1755
1756        uregex_close(re);
1757    }
1758
1759    /*
1760     *  groupUText()
1761     */
1762    {
1763        UChar    text1[80];
1764        UText   *actual;
1765        UBool    result;
1766        int64_t  groupLen = 0;
1767        UChar    groupBuf[20];
1768
1769        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
1770
1771        status = U_ZERO_ERROR;
1772        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1773        TEST_ASSERT_SUCCESS(status);
1774
1775        uregex_setText(re, text1, -1, &status);
1776        result = uregex_find(re, 0, &status);
1777        TEST_ASSERT(result==TRUE);
1778
1779        /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1780        status = U_ZERO_ERROR;
1781        actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1782        TEST_ASSERT_SUCCESS(status);
1783
1784        TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
1785        TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
1786        utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1787
1788        TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1789        utext_close(actual);
1790
1791        /*  Capture group #1.  Should succeed. */
1792        status = U_ZERO_ERROR;
1793
1794        actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1795        TEST_ASSERT_SUCCESS(status);
1796        TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
1797                                                           /*    (within the string text1)           */
1798        TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
1799        utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1800        TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1801
1802        utext_close(actual);
1803
1804        /*  Capture group out of range.  Error. */
1805        status = U_ZERO_ERROR;
1806        actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1807        TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1808        utext_close(actual);
1809
1810        uregex_close(re);
1811    }
1812
1813    /*
1814     *  replaceFirst()
1815     */
1816    {
1817        UChar    text1[80];
1818        UChar    text2[80];
1819        UText    replText = UTEXT_INITIALIZER;
1820        UText   *result;
1821        const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1822        const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1823        const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1824               0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1825        const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1826        const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1827        status = U_ZERO_ERROR;
1828        u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1829        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1830        utext_openUTF8(&replText, str_1x, -1, &status);
1831
1832        re = uregex_openC("x(.*?)x", 0, NULL, &status);
1833        TEST_ASSERT_SUCCESS(status);
1834
1835        /*  Normal case, with match */
1836        uregex_setText(re, text1, -1, &status);
1837        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1838        TEST_ASSERT_SUCCESS(status);
1839        TEST_ASSERT_UTEXT(str_Replxxx, result);
1840        utext_close(result);
1841
1842        /* No match.  Text should copy to output with no changes.  */
1843        uregex_setText(re, text2, -1, &status);
1844        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1845        TEST_ASSERT_SUCCESS(status);
1846        TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1847        utext_close(result);
1848
1849        /* Unicode escapes */
1850        uregex_setText(re, text1, -1, &status);
1851        utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1852        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1853        TEST_ASSERT_SUCCESS(status);
1854        TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1855        utext_close(result);
1856
1857        uregex_close(re);
1858        utext_close(&replText);
1859    }
1860
1861
1862    /*
1863     *  replaceAll()
1864     */
1865    {
1866        UChar    text1[80];
1867        UChar    text2[80];
1868        UText    replText = UTEXT_INITIALIZER;
1869        UText   *result;
1870        const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1871        const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1872        const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1873        status = U_ZERO_ERROR;
1874        u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1875        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1876        utext_openUTF8(&replText, str_1, -1, &status);
1877
1878        re = uregex_openC("x(.*?)x", 0, NULL, &status);
1879        TEST_ASSERT_SUCCESS(status);
1880
1881        /*  Normal case, with match */
1882        uregex_setText(re, text1, -1, &status);
1883        result = uregex_replaceAllUText(re, &replText, NULL, &status);
1884        TEST_ASSERT_SUCCESS(status);
1885        TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1886        utext_close(result);
1887
1888        /* No match.  Text should copy to output with no changes.  */
1889        uregex_setText(re, text2, -1, &status);
1890        result = uregex_replaceAllUText(re, &replText, NULL, &status);
1891        TEST_ASSERT_SUCCESS(status);
1892        TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1893        utext_close(result);
1894
1895        uregex_close(re);
1896        utext_close(&replText);
1897    }
1898
1899
1900    /*
1901     *  appendReplacement()
1902     */
1903    {
1904        UChar    text[100];
1905        UChar    repl[100];
1906        UChar    buf[100];
1907        UChar   *bufPtr;
1908        int32_t  bufCap;
1909
1910        status = U_ZERO_ERROR;
1911        re = uregex_openC(".*", 0, 0, &status);
1912        TEST_ASSERT_SUCCESS(status);
1913
1914        u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1915        u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1916        uregex_setText(re, text, -1, &status);
1917
1918        /* match covers whole target string */
1919        uregex_find(re, 0, &status);
1920        TEST_ASSERT_SUCCESS(status);
1921        bufPtr = buf;
1922        bufCap = UPRV_LENGTHOF(buf);
1923        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1924        TEST_ASSERT_SUCCESS(status);
1925        TEST_ASSERT_STRING("some other", buf, TRUE);
1926
1927        /* Match has \u \U escapes */
1928        uregex_find(re, 0, &status);
1929        TEST_ASSERT_SUCCESS(status);
1930        bufPtr = buf;
1931        bufCap = UPRV_LENGTHOF(buf);
1932        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1933        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1934        TEST_ASSERT_SUCCESS(status);
1935        TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1936
1937        uregex_close(re);
1938    }
1939
1940
1941    /*
1942     *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1943     */
1944
1945    /*
1946     *  splitUText()
1947     */
1948    {
1949        UChar    textToSplit[80];
1950        UChar    text2[80];
1951        UText    *fields[10];
1952        int32_t  numFields;
1953        int32_t i;
1954
1955        u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1956        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1957
1958        status = U_ZERO_ERROR;
1959        re = uregex_openC(":", 0, NULL, &status);
1960
1961
1962        /*  Simple split */
1963
1964        uregex_setText(re, textToSplit, -1, &status);
1965        TEST_ASSERT_SUCCESS(status);
1966
1967        /* The TEST_ASSERT_SUCCESS call above should change too... */
1968        if (U_SUCCESS(status)) {
1969            memset(fields, 0, sizeof(fields));
1970            numFields = uregex_splitUText(re, fields, 10, &status);
1971            TEST_ASSERT_SUCCESS(status);
1972
1973            /* The TEST_ASSERT_SUCCESS call above should change too... */
1974            if(U_SUCCESS(status)) {
1975              const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1976              const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1977              const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1978                TEST_ASSERT(numFields == 3);
1979                TEST_ASSERT_UTEXT(str_first,  fields[0]);
1980                TEST_ASSERT_UTEXT(str_second, fields[1]);
1981                TEST_ASSERT_UTEXT(str_third, fields[2]);
1982                TEST_ASSERT(fields[3] == NULL);
1983            }
1984            for(i = 0; i < numFields; i++) {
1985                utext_close(fields[i]);
1986            }
1987        }
1988
1989        uregex_close(re);
1990
1991
1992        /*  Split with too few output strings available */
1993        status = U_ZERO_ERROR;
1994        re = uregex_openC(":", 0, NULL, &status);
1995        uregex_setText(re, textToSplit, -1, &status);
1996        TEST_ASSERT_SUCCESS(status);
1997
1998        /* The TEST_ASSERT_SUCCESS call above should change too... */
1999        if(U_SUCCESS(status)) {
2000            fields[0] = NULL;
2001            fields[1] = NULL;
2002            fields[2] = &patternText;
2003            numFields = uregex_splitUText(re, fields, 2, &status);
2004            TEST_ASSERT_SUCCESS(status);
2005
2006            /* The TEST_ASSERT_SUCCESS call above should change too... */
2007            if(U_SUCCESS(status)) {
2008                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2009                const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2010                TEST_ASSERT(numFields == 2);
2011                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2012                TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2013                TEST_ASSERT(fields[2] == &patternText);
2014            }
2015            for(i = 0; i < numFields; i++) {
2016                utext_close(fields[i]);
2017            }
2018        }
2019
2020        uregex_close(re);
2021    }
2022
2023    /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2024     *                   comes out as additional fields.  */
2025    {
2026        UChar    textToSplit[80];
2027        UText    *fields[10];
2028        int32_t  numFields;
2029        int32_t i;
2030
2031        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
2032
2033        status = U_ZERO_ERROR;
2034        re = uregex_openC("<(.*?)>", 0, NULL, &status);
2035
2036        uregex_setText(re, textToSplit, -1, &status);
2037        TEST_ASSERT_SUCCESS(status);
2038
2039        /* The TEST_ASSERT_SUCCESS call above should change too... */
2040        if(U_SUCCESS(status)) {
2041            memset(fields, 0, sizeof(fields));
2042            numFields = uregex_splitUText(re, fields, 10, &status);
2043            TEST_ASSERT_SUCCESS(status);
2044
2045            /* The TEST_ASSERT_SUCCESS call above should change too... */
2046            if(U_SUCCESS(status)) {
2047                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2048                const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2049                const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2050                const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2051                const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2052
2053                TEST_ASSERT(numFields == 5);
2054                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2055                TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2056                TEST_ASSERT_UTEXT(str_second, fields[2]);
2057                TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2058                TEST_ASSERT_UTEXT(str_third, fields[4]);
2059                TEST_ASSERT(fields[5] == NULL);
2060            }
2061            for(i = 0; i < numFields; i++) {
2062                utext_close(fields[i]);
2063            }
2064        }
2065
2066        /*  Split with too few output strings available (2) */
2067        status = U_ZERO_ERROR;
2068        fields[0] = NULL;
2069        fields[1] = NULL;
2070        fields[2] = &patternText;
2071        numFields = uregex_splitUText(re, fields, 2, &status);
2072        TEST_ASSERT_SUCCESS(status);
2073
2074        /* The TEST_ASSERT_SUCCESS call above should change too... */
2075        if(U_SUCCESS(status)) {
2076            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2077            const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2078            TEST_ASSERT(numFields == 2);
2079            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2080            TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2081            TEST_ASSERT(fields[2] == &patternText);
2082        }
2083        for(i = 0; i < numFields; i++) {
2084            utext_close(fields[i]);
2085        }
2086
2087
2088        /*  Split with too few output strings available (3) */
2089        status = U_ZERO_ERROR;
2090        fields[0] = NULL;
2091        fields[1] = NULL;
2092        fields[2] = NULL;
2093        fields[3] = &patternText;
2094        numFields = uregex_splitUText(re, fields, 3, &status);
2095        TEST_ASSERT_SUCCESS(status);
2096
2097        /* The TEST_ASSERT_SUCCESS call above should change too... */
2098        if(U_SUCCESS(status)) {
2099            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2100            const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2101            const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2102            TEST_ASSERT(numFields == 3);
2103            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2104            TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2105            TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2106            TEST_ASSERT(fields[3] == &patternText);
2107        }
2108        for(i = 0; i < numFields; i++) {
2109            utext_close(fields[i]);
2110        }
2111
2112        /*  Split with just enough output strings available (5) */
2113        status = U_ZERO_ERROR;
2114        fields[0] = NULL;
2115        fields[1] = NULL;
2116        fields[2] = NULL;
2117        fields[3] = NULL;
2118        fields[4] = NULL;
2119        fields[5] = &patternText;
2120        numFields = uregex_splitUText(re, fields, 5, &status);
2121        TEST_ASSERT_SUCCESS(status);
2122
2123        /* The TEST_ASSERT_SUCCESS call above should change too... */
2124        if(U_SUCCESS(status)) {
2125            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2126            const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2127            const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2128            const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2129            const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2130
2131            TEST_ASSERT(numFields == 5);
2132            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2133            TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2134            TEST_ASSERT_UTEXT(str_second, fields[2]);
2135            TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2136            TEST_ASSERT_UTEXT(str_third, fields[4]);
2137            TEST_ASSERT(fields[5] == &patternText);
2138        }
2139        for(i = 0; i < numFields; i++) {
2140            utext_close(fields[i]);
2141        }
2142
2143        /* Split, end of text is a field delimiter.   */
2144        status = U_ZERO_ERROR;
2145        uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2146        TEST_ASSERT_SUCCESS(status);
2147
2148        /* The TEST_ASSERT_SUCCESS call above should change too... */
2149        if(U_SUCCESS(status)) {
2150            memset(fields, 0, sizeof(fields));
2151            fields[9] = &patternText;
2152            numFields = uregex_splitUText(re, fields, 9, &status);
2153            TEST_ASSERT_SUCCESS(status);
2154
2155            /* The TEST_ASSERT_SUCCESS call above should change too... */
2156            if(U_SUCCESS(status)) {
2157                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2158                const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2159                const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2160                const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2161                const char str_empty[] = { 0x00 };
2162
2163                TEST_ASSERT(numFields == 5);
2164                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2165                TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2166                TEST_ASSERT_UTEXT(str_second, fields[2]);
2167                TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2168                TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2169                TEST_ASSERT(fields[5] == NULL);
2170                TEST_ASSERT(fields[8] == NULL);
2171                TEST_ASSERT(fields[9] == &patternText);
2172            }
2173            for(i = 0; i < numFields; i++) {
2174                utext_close(fields[i]);
2175            }
2176        }
2177
2178        uregex_close(re);
2179    }
2180    utext_close(&patternText);
2181}
2182
2183
2184static void TestRefreshInput(void) {
2185    /*
2186     *  RefreshInput changes out the input of a URegularExpression without
2187     *    changing anything else in the match state.  Used with Java JNI,
2188     *    when Java moves the underlying string storage.   This test
2189     *    runs a find() loop, moving the text after the first match.
2190     *    The right number of matches should still be found.
2191     */
2192    UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2193    UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2194    UErrorCode status = U_ZERO_ERROR;
2195    URegularExpression *re;
2196    UText ut1 = UTEXT_INITIALIZER;
2197    UText ut2 = UTEXT_INITIALIZER;
2198
2199    re = uregex_openC("[ABC]", 0, 0, &status);
2200    TEST_ASSERT_SUCCESS(status);
2201
2202    utext_openUChars(&ut1, testStr, -1, &status);
2203    TEST_ASSERT_SUCCESS(status);
2204    uregex_setUText(re, &ut1, &status);
2205    TEST_ASSERT_SUCCESS(status);
2206
2207    /* Find the first match "A" in the original string */
2208    TEST_ASSERT(uregex_findNext(re, &status));
2209    TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2210
2211    /* Move the string, kill the original string.  */
2212    u_strcpy(movedStr, testStr);
2213    u_memset(testStr, 0, u_strlen(testStr));
2214    utext_openUChars(&ut2, movedStr, -1, &status);
2215    TEST_ASSERT_SUCCESS(status);
2216    uregex_refreshUText(re, &ut2, &status);
2217    TEST_ASSERT_SUCCESS(status);
2218
2219    /* Find the following two matches, now working in the moved string. */
2220    TEST_ASSERT(uregex_findNext(re, &status));
2221    TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2222    TEST_ASSERT(uregex_findNext(re, &status));
2223    TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2224    TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2225
2226    uregex_close(re);
2227}
2228
2229
2230static void TestBug8421(void) {
2231    /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2232     *             was failing.
2233     */
2234    URegularExpression *re;
2235    UErrorCode status = U_ZERO_ERROR;
2236    int32_t  limit = -1;
2237
2238    re = uregex_openC("abc", 0, 0, &status);
2239    TEST_ASSERT_SUCCESS(status);
2240
2241    limit = uregex_getTimeLimit(re, &status);
2242    TEST_ASSERT_SUCCESS(status);
2243    TEST_ASSERT(limit == 0);
2244
2245    uregex_setTimeLimit(re, 100, &status);
2246    TEST_ASSERT_SUCCESS(status);
2247    limit = uregex_getTimeLimit(re, &status);
2248    TEST_ASSERT_SUCCESS(status);
2249    TEST_ASSERT(limit == 100);
2250
2251    uregex_close(re);
2252}
2253
2254static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2255    return FALSE;
2256}
2257
2258static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2259    return FALSE;
2260}
2261
2262static void TestBug10815() {
2263  /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2264   *              when the callback function specified by uregex_setMatchCallback() returns FALSE
2265   */
2266    URegularExpression *re;
2267    UErrorCode status = U_ZERO_ERROR;
2268    UChar    text[100];
2269
2270
2271    // findNext() with a find progress callback function.
2272
2273    re = uregex_openC(".z", 0, 0, &status);
2274    TEST_ASSERT_SUCCESS(status);
2275
2276    u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
2277    uregex_setText(re, text, -1, &status);
2278    TEST_ASSERT_SUCCESS(status);
2279
2280    uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2281    TEST_ASSERT_SUCCESS(status);
2282
2283    uregex_findNext(re, &status);
2284    TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2285
2286    uregex_close(re);
2287
2288    // findNext() with a match progress callback function.
2289
2290    status = U_ZERO_ERROR;
2291    re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2292    TEST_ASSERT_SUCCESS(status);
2293
2294    // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2295    // it will appear to be stuck in a (near) infinite loop.
2296    u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
2297    uregex_setText(re, text, -1, &status);
2298    TEST_ASSERT_SUCCESS(status);
2299
2300    uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2301    TEST_ASSERT_SUCCESS(status);
2302
2303    uregex_findNext(re, &status);
2304    TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2305
2306    uregex_close(re);
2307}
2308
2309
2310#endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
2311