1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2004-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File reapits.c
9*
10*********************************************************************************/
11/*C API TEST FOR Regular Expressions */
12/**
13*   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
14*   try to test the full functionality.  It just calls each function and verifies that it
15*   works on a basic level.
16*
17*   More complete testing of regular expression functionality is done with the C++ tests.
18**/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_REGULAR_EXPRESSIONS
23
24#include <stdlib.h>
25#include <string.h>
26#include "unicode/uloc.h"
27#include "unicode/uregex.h"
28#include "unicode/ustring.h"
29#include "unicode/utext.h"
30#include "cintltst.h"
31#include "cmemory.h"
32
33#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
34log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
35
36#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
37log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
38
39/*
40 *   TEST_SETUP and TEST_TEARDOWN
41 *         macros to handle the boilerplate around setting up regex test cases.
42 *         parameteres to setup:
43 *              pattern:     The regex pattern, a (char *) null terminated C string.
44 *              testString:  The string data, also a (char *) C string.
45 *              flags:       Regex flags to set when compiling the pattern
46 *
47 *         Put arbitrary test code between SETUP and TEARDOWN.
48 *         're" is the compiled, ready-to-go  regular expression.
49 */
50#define TEST_SETUP(pattern, testString, flags) {  \
51    UChar   *srcString = NULL;  \
52    status = U_ZERO_ERROR; \
53    re = uregex_openC(pattern, flags, NULL, &status);  \
54    TEST_ASSERT_SUCCESS(status);   \
55    srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
56    u_uastrncpy(srcString, testString,  strlen(testString)+1); \
57    uregex_setText(re, srcString, -1, &status); \
58    TEST_ASSERT_SUCCESS(status);  \
59    if (U_SUCCESS(status)) {
60
61#define TEST_TEARDOWN  \
62    }  \
63    TEST_ASSERT_SUCCESS(status);  \
64    uregex_close(re);  \
65    free(srcString);   \
66    }
67
68
69/**
70 * @param expected utf-8 array of bytes to be expected
71 */
72static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
73     char     buf_inside_macro[120];
74     int32_t  len = (int32_t)strlen(expected);
75     UBool    success;
76     if (nulTerm) {
77         u_austrncpy(buf_inside_macro, (actual), len+1);
78         buf_inside_macro[len+2] = 0;
79         success = (strcmp((expected), buf_inside_macro) == 0);
80     } else {
81         u_austrncpy(buf_inside_macro, (actual), len);
82         buf_inside_macro[len+1] = 0;
83         success = (strncmp((expected), buf_inside_macro, len) == 0);
84     }
85     if (success == FALSE) {
86         log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
87             file, line, (expected), buf_inside_macro);
88     }
89}
90
91#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
92
93
94static UBool equals_utf8_utext(const char *utf8, UText *utext) {
95    int32_t u8i = 0;
96    UChar32 u8c = 0;
97    UChar32 utc = 0;
98    UBool   stringsEqual = TRUE;
99    utext_setNativeIndex(utext, 0);
100    for (;;) {
101        U8_NEXT_UNSAFE(utf8, u8i, u8c);
102        utc = utext_next32(utext);
103        if (u8c == 0 && utc == U_SENTINEL) {
104            break;
105        }
106        if (u8c != utc || u8c == 0) {
107            stringsEqual = FALSE;
108            break;
109        }
110    }
111    return stringsEqual;
112}
113
114
115static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
116    utext_setNativeIndex(actual, 0);
117    if (!equals_utf8_utext(expected, actual)) {
118        UChar32 c;
119        log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
120        c = utext_next32From(actual, 0);
121        while (c != U_SENTINEL) {
122            if (0x20<c && c <0x7e) {
123                log_err("%c", c);
124            } else {
125                log_err("%#x", c);
126            }
127            c = UTEXT_NEXT32(actual);
128        }
129        log_err("\"\n");
130    }
131}
132
133/*
134 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
135 *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
136 */
137#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
138
139static UBool testUTextEqual(UText *uta, UText *utb) {
140    UChar32 ca = 0;
141    UChar32 cb = 0;
142    utext_setNativeIndex(uta, 0);
143    utext_setNativeIndex(utb, 0);
144    do {
145        ca = utext_next32(uta);
146        cb = utext_next32(utb);
147        if (ca != cb) {
148            break;
149        }
150    } while (ca != U_SENTINEL);
151    return ca == cb;
152}
153
154
155
156
157static void TestRegexCAPI(void);
158static void TestBug4315(void);
159static void TestUTextAPI(void);
160static void TestRefreshInput(void);
161static void TestBug8421(void);
162static void TestBug10815(void);
163
164void addURegexTest(TestNode** root);
165
166void addURegexTest(TestNode** root)
167{
168    addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
169    addTest(root, &TestBug4315,   "regex/TestBug4315");
170    addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
171    addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
172    addTest(root, &TestBug8421,   "regex/TestBug8421");
173    addTest(root, &TestBug10815,   "regex/TestBug10815");
174}
175
176/*
177 * Call back function and context struct used for testing
178 *    regular expression user callbacks.  This test is mostly the same as
179 *   the corresponding C++ test in intltest.
180 */
181typedef struct callBackContext {
182    int32_t          maxCalls;
183    int32_t          numCalls;
184    int32_t          lastSteps;
185} callBackContext;
186
187static UBool U_EXPORT2 U_CALLCONV
188TestCallbackFn(const void *context, int32_t steps) {
189  callBackContext  *info = (callBackContext *)context;
190  if (info->lastSteps+1 != steps) {
191      log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
192  }
193  info->lastSteps = steps;
194  info->numCalls++;
195  return (info->numCalls < info->maxCalls);
196}
197
198/*
199 *   Regular Expression C API Tests
200 */
201static void TestRegexCAPI(void) {
202    UErrorCode           status = U_ZERO_ERROR;
203    URegularExpression  *re;
204    UChar                pat[200];
205    UChar               *minus1;
206
207    memset(&minus1, -1, sizeof(minus1));
208
209    /* Mimimalist open/close */
210    u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
211    re = uregex_open(pat, -1, 0, 0, &status);
212    if (U_FAILURE(status)) {
213         log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
214         return;
215    }
216    uregex_close(re);
217
218    /* Open with all flag values set */
219    status = U_ZERO_ERROR;
220    re = uregex_open(pat, -1,
221        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
222        0, &status);
223    TEST_ASSERT_SUCCESS(status);
224    uregex_close(re);
225
226    /* Open with an invalid flag */
227    status = U_ZERO_ERROR;
228    re = uregex_open(pat, -1, 0x40000000, 0, &status);
229    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
230    uregex_close(re);
231
232    /* Open with an unimplemented flag */
233    status = U_ZERO_ERROR;
234    re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
235    TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
236    uregex_close(re);
237
238    /* openC with an invalid parameter */
239    status = U_ZERO_ERROR;
240    re = uregex_openC(NULL,
241        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
242    TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
243
244    /* openC with an invalid parameter */
245    status = U_USELESS_COLLATOR_ERROR;
246    re = uregex_openC(NULL,
247        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
248    TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
249
250    /* openC   open from a C string */
251    {
252        const UChar   *p;
253        int32_t  len;
254        status = U_ZERO_ERROR;
255        re = uregex_openC("abc*", 0, 0, &status);
256        TEST_ASSERT_SUCCESS(status);
257        p = uregex_pattern(re, &len, &status);
258        TEST_ASSERT_SUCCESS(status);
259
260        /* The TEST_ASSERT_SUCCESS above should change too... */
261        if(U_SUCCESS(status)) {
262            u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
263            TEST_ASSERT(u_strcmp(pat, p) == 0);
264            TEST_ASSERT(len==(int32_t)strlen("abc*"));
265        }
266
267        uregex_close(re);
268
269        /*  TODO:  Open with ParseError parameter */
270    }
271
272    /*
273     *  clone
274     */
275    {
276        URegularExpression *clone1;
277        URegularExpression *clone2;
278        URegularExpression *clone3;
279        UChar  testString1[30];
280        UChar  testString2[30];
281        UBool  result;
282
283
284        status = U_ZERO_ERROR;
285        re = uregex_openC("abc*", 0, 0, &status);
286        TEST_ASSERT_SUCCESS(status);
287        clone1 = uregex_clone(re, &status);
288        TEST_ASSERT_SUCCESS(status);
289        TEST_ASSERT(clone1 != NULL);
290
291        status = U_ZERO_ERROR;
292        clone2 = uregex_clone(re, &status);
293        TEST_ASSERT_SUCCESS(status);
294        TEST_ASSERT(clone2 != NULL);
295        uregex_close(re);
296
297        status = U_ZERO_ERROR;
298        clone3 = uregex_clone(clone2, &status);
299        TEST_ASSERT_SUCCESS(status);
300        TEST_ASSERT(clone3 != NULL);
301
302        u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
303        u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
304
305        status = U_ZERO_ERROR;
306        uregex_setText(clone1, testString1, -1, &status);
307        TEST_ASSERT_SUCCESS(status);
308        result = uregex_lookingAt(clone1, 0, &status);
309        TEST_ASSERT_SUCCESS(status);
310        TEST_ASSERT(result==TRUE);
311
312        status = U_ZERO_ERROR;
313        uregex_setText(clone2, testString2, -1, &status);
314        TEST_ASSERT_SUCCESS(status);
315        result = uregex_lookingAt(clone2, 0, &status);
316        TEST_ASSERT_SUCCESS(status);
317        TEST_ASSERT(result==FALSE);
318        result = uregex_find(clone2, 0, &status);
319        TEST_ASSERT_SUCCESS(status);
320        TEST_ASSERT(result==TRUE);
321
322        uregex_close(clone1);
323        uregex_close(clone2);
324        uregex_close(clone3);
325
326    }
327
328    /*
329     *  pattern()
330    */
331    {
332        const UChar  *resultPat;
333        int32_t       resultLen;
334        u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
335        status = U_ZERO_ERROR;
336        re = uregex_open(pat, -1, 0, NULL, &status);
337        resultPat = uregex_pattern(re, &resultLen, &status);
338        TEST_ASSERT_SUCCESS(status);
339
340        /* The TEST_ASSERT_SUCCESS above should change too... */
341        if (U_SUCCESS(status)) {
342            TEST_ASSERT(resultLen == -1);
343            TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
344        }
345
346        uregex_close(re);
347
348        status = U_ZERO_ERROR;
349        re = uregex_open(pat, 3, 0, NULL, &status);
350        resultPat = uregex_pattern(re, &resultLen, &status);
351        TEST_ASSERT_SUCCESS(status);
352        TEST_ASSERT_SUCCESS(status);
353
354        /* The TEST_ASSERT_SUCCESS above should change too... */
355        if (U_SUCCESS(status)) {
356            TEST_ASSERT(resultLen == 3);
357            TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
358            TEST_ASSERT(u_strlen(resultPat) == 3);
359        }
360
361        uregex_close(re);
362    }
363
364    /*
365     *  flags()
366     */
367    {
368        int32_t  t;
369
370        status = U_ZERO_ERROR;
371        re = uregex_open(pat, -1, 0, NULL, &status);
372        t  = uregex_flags(re, &status);
373        TEST_ASSERT_SUCCESS(status);
374        TEST_ASSERT(t == 0);
375        uregex_close(re);
376
377        status = U_ZERO_ERROR;
378        re = uregex_open(pat, -1, 0, NULL, &status);
379        t  = uregex_flags(re, &status);
380        TEST_ASSERT_SUCCESS(status);
381        TEST_ASSERT(t == 0);
382        uregex_close(re);
383
384        status = U_ZERO_ERROR;
385        re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
386        t  = uregex_flags(re, &status);
387        TEST_ASSERT_SUCCESS(status);
388        TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
389        uregex_close(re);
390    }
391
392    /*
393     *  setText() and lookingAt()
394     */
395    {
396        UChar  text1[50];
397        UChar  text2[50];
398        UBool  result;
399
400        u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
401        u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
402        status = U_ZERO_ERROR;
403        u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
404        re = uregex_open(pat, -1, 0, NULL, &status);
405        TEST_ASSERT_SUCCESS(status);
406
407        /* Operation before doing a setText should fail... */
408        status = U_ZERO_ERROR;
409        uregex_lookingAt(re, 0, &status);
410        TEST_ASSERT( status== U_REGEX_INVALID_STATE);
411
412        status = U_ZERO_ERROR;
413        uregex_setText(re, text1, -1, &status);
414        result = uregex_lookingAt(re, 0, &status);
415        TEST_ASSERT(result == TRUE);
416        TEST_ASSERT_SUCCESS(status);
417
418        status = U_ZERO_ERROR;
419        uregex_setText(re, text2, -1, &status);
420        result = uregex_lookingAt(re, 0, &status);
421        TEST_ASSERT(result == FALSE);
422        TEST_ASSERT_SUCCESS(status);
423
424        status = U_ZERO_ERROR;
425        uregex_setText(re, text1, -1, &status);
426        result = uregex_lookingAt(re, 0, &status);
427        TEST_ASSERT(result == TRUE);
428        TEST_ASSERT_SUCCESS(status);
429
430        status = U_ZERO_ERROR;
431        uregex_setText(re, text1, 5, &status);
432        result = uregex_lookingAt(re, 0, &status);
433        TEST_ASSERT(result == FALSE);
434        TEST_ASSERT_SUCCESS(status);
435
436        status = U_ZERO_ERROR;
437        uregex_setText(re, text1, 6, &status);
438        result = uregex_lookingAt(re, 0, &status);
439        TEST_ASSERT(result == TRUE);
440        TEST_ASSERT_SUCCESS(status);
441
442        uregex_close(re);
443    }
444
445
446    /*
447     *  getText()
448     */
449    {
450        UChar    text1[50];
451        UChar    text2[50];
452        const UChar   *result;
453        int32_t  textLength;
454
455        u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
456        u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
457        status = U_ZERO_ERROR;
458        u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
459        re = uregex_open(pat, -1, 0, NULL, &status);
460
461        uregex_setText(re, text1, -1, &status);
462        result = uregex_getText(re, &textLength, &status);
463        TEST_ASSERT(result == text1);
464        TEST_ASSERT(textLength == -1);
465        TEST_ASSERT_SUCCESS(status);
466
467        status = U_ZERO_ERROR;
468        uregex_setText(re, text2, 7, &status);
469        result = uregex_getText(re, &textLength, &status);
470        TEST_ASSERT(result == text2);
471        TEST_ASSERT(textLength == 7);
472        TEST_ASSERT_SUCCESS(status);
473
474        status = U_ZERO_ERROR;
475        uregex_setText(re, text2, 4, &status);
476        result = uregex_getText(re, &textLength, &status);
477        TEST_ASSERT(result == text2);
478        TEST_ASSERT(textLength == 4);
479        TEST_ASSERT_SUCCESS(status);
480        uregex_close(re);
481    }
482
483    /*
484     *  matches()
485     */
486    {
487        UChar   text1[50];
488        UBool   result;
489        int     len;
490        UChar   nullString[] = {0,0,0};
491
492        u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
493        status = U_ZERO_ERROR;
494        u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
495        re = uregex_open(pat, -1, 0, NULL, &status);
496
497        uregex_setText(re, text1, -1, &status);
498        result = uregex_matches(re, 0, &status);
499        TEST_ASSERT(result == FALSE);
500        TEST_ASSERT_SUCCESS(status);
501
502        status = U_ZERO_ERROR;
503        uregex_setText(re, text1, 6, &status);
504        result = uregex_matches(re, 0, &status);
505        TEST_ASSERT(result == TRUE);
506        TEST_ASSERT_SUCCESS(status);
507
508        status = U_ZERO_ERROR;
509        uregex_setText(re, text1, 6, &status);
510        result = uregex_matches(re, 1, &status);
511        TEST_ASSERT(result == FALSE);
512        TEST_ASSERT_SUCCESS(status);
513        uregex_close(re);
514
515        status = U_ZERO_ERROR;
516        re = uregex_openC(".?", 0, NULL, &status);
517        uregex_setText(re, text1, -1, &status);
518        len = u_strlen(text1);
519        result = uregex_matches(re, len, &status);
520        TEST_ASSERT(result == TRUE);
521        TEST_ASSERT_SUCCESS(status);
522
523        status = U_ZERO_ERROR;
524        uregex_setText(re, nullString, -1, &status);
525        TEST_ASSERT_SUCCESS(status);
526        result = uregex_matches(re, 0, &status);
527        TEST_ASSERT(result == TRUE);
528        TEST_ASSERT_SUCCESS(status);
529        uregex_close(re);
530    }
531
532
533    /*
534     *  lookingAt()    Used in setText test.
535     */
536
537
538    /*
539     *  find(), findNext, start, end, reset
540     */
541    {
542        UChar    text1[50];
543        UBool    result;
544        u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
545        status = U_ZERO_ERROR;
546        re = uregex_openC("rx", 0, NULL, &status);
547
548        uregex_setText(re, text1, -1, &status);
549        result = uregex_find(re, 0, &status);
550        TEST_ASSERT(result == TRUE);
551        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
552        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
553        TEST_ASSERT_SUCCESS(status);
554
555        result = uregex_find(re, 9, &status);
556        TEST_ASSERT(result == TRUE);
557        TEST_ASSERT(uregex_start(re, 0, &status) == 11);
558        TEST_ASSERT(uregex_end(re, 0, &status) == 13);
559        TEST_ASSERT_SUCCESS(status);
560
561        result = uregex_find(re, 14, &status);
562        TEST_ASSERT(result == FALSE);
563        TEST_ASSERT_SUCCESS(status);
564
565        status = U_ZERO_ERROR;
566        uregex_reset(re, 0, &status);
567
568        result = uregex_findNext(re, &status);
569        TEST_ASSERT(result == TRUE);
570        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
571        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
572        TEST_ASSERT_SUCCESS(status);
573
574        result = uregex_findNext(re, &status);
575        TEST_ASSERT(result == TRUE);
576        TEST_ASSERT(uregex_start(re, 0, &status) == 6);
577        TEST_ASSERT(uregex_end(re, 0, &status) == 8);
578        TEST_ASSERT_SUCCESS(status);
579
580        status = U_ZERO_ERROR;
581        uregex_reset(re, 12, &status);
582
583        result = uregex_findNext(re, &status);
584        TEST_ASSERT(result == TRUE);
585        TEST_ASSERT(uregex_start(re, 0, &status) == 13);
586        TEST_ASSERT(uregex_end(re, 0, &status) == 15);
587        TEST_ASSERT_SUCCESS(status);
588
589        result = uregex_findNext(re, &status);
590        TEST_ASSERT(result == FALSE);
591        TEST_ASSERT_SUCCESS(status);
592
593        uregex_close(re);
594    }
595
596    /*
597     *  groupCount
598     */
599    {
600        int32_t result;
601
602        status = U_ZERO_ERROR;
603        re = uregex_openC("abc", 0, NULL, &status);
604        result = uregex_groupCount(re, &status);
605        TEST_ASSERT_SUCCESS(status);
606        TEST_ASSERT(result == 0);
607        uregex_close(re);
608
609        status = U_ZERO_ERROR;
610        re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
611        result = uregex_groupCount(re, &status);
612        TEST_ASSERT_SUCCESS(status);
613        TEST_ASSERT(result == 3);
614        uregex_close(re);
615
616    }
617
618
619    /*
620     *  group()
621     */
622    {
623        UChar    text1[80];
624        UChar    buf[80];
625        UBool    result;
626        int32_t  resultSz;
627        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
628
629        status = U_ZERO_ERROR;
630        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
631        TEST_ASSERT_SUCCESS(status);
632
633
634        uregex_setText(re, text1, -1, &status);
635        result = uregex_find(re, 0, &status);
636        TEST_ASSERT(result==TRUE);
637
638        /*  Capture Group 0, the full match.  Should succeed.  */
639        status = U_ZERO_ERROR;
640        resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
641        TEST_ASSERT_SUCCESS(status);
642        TEST_ASSERT_STRING("abc interior def", buf, TRUE);
643        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
644
645        /*  Capture group #1.  Should succeed. */
646        status = U_ZERO_ERROR;
647        resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
648        TEST_ASSERT_SUCCESS(status);
649        TEST_ASSERT_STRING(" interior ", buf, TRUE);
650        TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
651
652        /*  Capture group out of range.  Error. */
653        status = U_ZERO_ERROR;
654        uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
655        TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
656
657        /* NULL buffer, pure pre-flight */
658        status = U_ZERO_ERROR;
659        resultSz = uregex_group(re, 0, NULL, 0, &status);
660        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
661        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
662
663        /* Too small buffer, truncated string */
664        status = U_ZERO_ERROR;
665        memset(buf, -1, sizeof(buf));
666        resultSz = uregex_group(re, 0, buf, 5, &status);
667        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
668        TEST_ASSERT_STRING("abc i", buf, FALSE);
669        TEST_ASSERT(buf[5] == (UChar)0xffff);
670        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
671
672        /* Output string just fits buffer, no NUL term. */
673        status = U_ZERO_ERROR;
674        resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
675        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
676        TEST_ASSERT_STRING("abc interior def", buf, FALSE);
677        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
678        TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
679
680        uregex_close(re);
681
682    }
683
684    /*
685     *  Regions
686     */
687
688
689        /* SetRegion(), getRegion() do something  */
690        TEST_SETUP(".*", "0123456789ABCDEF", 0)
691        UChar resultString[40];
692        TEST_ASSERT(uregex_regionStart(re, &status) == 0);
693        TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
694        uregex_setRegion(re, 3, 6, &status);
695        TEST_ASSERT(uregex_regionStart(re, &status) == 3);
696        TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
697        TEST_ASSERT(uregex_findNext(re, &status));
698        TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
699        TEST_ASSERT_STRING("345", resultString, TRUE);
700        TEST_TEARDOWN;
701
702        /* find(start=-1) uses regions   */
703        TEST_SETUP(".*", "0123456789ABCDEF", 0);
704        uregex_setRegion(re, 4, 6, &status);
705        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
706        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
707        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
708        TEST_TEARDOWN;
709
710        /* find (start >=0) does not use regions   */
711        TEST_SETUP(".*", "0123456789ABCDEF", 0);
712        uregex_setRegion(re, 4, 6, &status);
713        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
714        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
715        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
716        TEST_TEARDOWN;
717
718        /* findNext() obeys regions    */
719        TEST_SETUP(".", "0123456789ABCDEF", 0);
720        uregex_setRegion(re, 4, 6, &status);
721        TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
722        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
723        TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
724        TEST_ASSERT(uregex_start(re, 0, &status) == 5);
725        TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
726        TEST_TEARDOWN;
727
728        /* matches(start=-1) uses regions                                           */
729        /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
730        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
731        uregex_setRegion(re, 4, 6, &status);
732        TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
733        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
734        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
735        TEST_TEARDOWN;
736
737        /* matches (start >=0) does not use regions       */
738        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
739        uregex_setRegion(re, 4, 6, &status);
740        TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
741        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
742        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
743        TEST_TEARDOWN;
744
745        /* lookingAt(start=-1) uses regions                                         */
746        /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
747        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
748        uregex_setRegion(re, 4, 6, &status);
749        TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
750        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
751        TEST_ASSERT(uregex_end(re, 0, &status) == 4);
752        TEST_TEARDOWN;
753
754        /* lookingAt (start >=0) does not use regions  */
755        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
756        uregex_setRegion(re, 4, 6, &status);
757        TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
758        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
759        TEST_ASSERT(uregex_end(re, 0, &status) == 0);
760        TEST_TEARDOWN;
761
762        /* hitEnd()       */
763        TEST_SETUP("[a-f]*", "abcdefghij", 0);
764        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
765        TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
766        TEST_TEARDOWN;
767
768        TEST_SETUP("[a-f]*", "abcdef", 0);
769        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
770        TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
771        TEST_TEARDOWN;
772
773        /* requireEnd   */
774        TEST_SETUP("abcd", "abcd", 0);
775        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
776        TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
777        TEST_TEARDOWN;
778
779        TEST_SETUP("abcd$", "abcd", 0);
780        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
781        TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
782        TEST_TEARDOWN;
783
784        /* anchoringBounds        */
785        TEST_SETUP("abc$", "abcdef", 0);
786        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
787        uregex_useAnchoringBounds(re, FALSE, &status);
788        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
789
790        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
791        uregex_useAnchoringBounds(re, TRUE, &status);
792        uregex_setRegion(re, 0, 3, &status);
793        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
794        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
795        TEST_TEARDOWN;
796
797        /* Transparent Bounds      */
798        TEST_SETUP("abc(?=def)", "abcdef", 0);
799        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
800        uregex_useTransparentBounds(re, TRUE, &status);
801        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
802
803        uregex_useTransparentBounds(re, FALSE, &status);
804        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
805        uregex_setRegion(re, 0, 3, &status);
806        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
807        uregex_useTransparentBounds(re, TRUE, &status);
808        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
809        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
810        TEST_TEARDOWN;
811
812
813    /*
814     *  replaceFirst()
815     */
816    {
817        UChar    text1[80];
818        UChar    text2[80];
819        UChar    replText[80];
820        UChar    buf[80];
821        int32_t  resultSz;
822        u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
823        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
824        u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
825
826        status = U_ZERO_ERROR;
827        re = uregex_openC("x(.*?)x", 0, NULL, &status);
828        TEST_ASSERT_SUCCESS(status);
829
830        /*  Normal case, with match */
831        uregex_setText(re, text1, -1, &status);
832        resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
833        TEST_ASSERT_SUCCESS(status);
834        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
835        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
836
837        /* No match.  Text should copy to output with no changes.  */
838        status = U_ZERO_ERROR;
839        uregex_setText(re, text2, -1, &status);
840        resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
841        TEST_ASSERT_SUCCESS(status);
842        TEST_ASSERT_STRING("No match here.", buf, TRUE);
843        TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
844
845        /*  Match, output just fills buffer, no termination warning. */
846        status = U_ZERO_ERROR;
847        uregex_setText(re, text1, -1, &status);
848        memset(buf, -1, sizeof(buf));
849        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
850        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
851        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
852        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
853        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
854
855        /* Do the replaceFirst again, without first resetting anything.
856         *  Should give the same results.
857         */
858        status = U_ZERO_ERROR;
859        memset(buf, -1, sizeof(buf));
860        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
861        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
862        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
863        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
864        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
865
866        /* NULL buffer, zero buffer length */
867        status = U_ZERO_ERROR;
868        resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
869        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
870        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
871
872        /* Buffer too small by one */
873        status = U_ZERO_ERROR;
874        memset(buf, -1, sizeof(buf));
875        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
876        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
877        TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
878        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
879        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
880
881        uregex_close(re);
882    }
883
884
885    /*
886     *  replaceAll()
887     */
888    {
889        UChar    text1[80];          /*  "Replace xaax x1x x...x." */
890        UChar    text2[80];          /*  "No match Here"           */
891        UChar    replText[80];       /*  "<$1>"                    */
892        UChar    replText2[80];      /*  "<<$1>>"                  */
893        const char * pattern = "x(.*?)x";
894        const char * expectedResult = "Replace <aa> <1> <...>.";
895        const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
896        UChar    buf[80];
897        int32_t  resultSize;
898        int32_t  expectedResultSize;
899        int32_t  expectedResultSize2;
900        int32_t  i;
901
902        u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
903        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
904        u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
905        u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
906        expectedResultSize = strlen(expectedResult);
907        expectedResultSize2 = strlen(expectedResult2);
908
909        status = U_ZERO_ERROR;
910        re = uregex_openC(pattern, 0, NULL, &status);
911        TEST_ASSERT_SUCCESS(status);
912
913        /*  Normal case, with match */
914        uregex_setText(re, text1, -1, &status);
915        resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
916        TEST_ASSERT_SUCCESS(status);
917        TEST_ASSERT_STRING(expectedResult, buf, TRUE);
918        TEST_ASSERT(resultSize == expectedResultSize);
919
920        /* No match.  Text should copy to output with no changes.  */
921        status = U_ZERO_ERROR;
922        uregex_setText(re, text2, -1, &status);
923        resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
924        TEST_ASSERT_SUCCESS(status);
925        TEST_ASSERT_STRING("No match here.", buf, TRUE);
926        TEST_ASSERT(resultSize == u_strlen(text2));
927
928        /*  Match, output just fills buffer, no termination warning. */
929        status = U_ZERO_ERROR;
930        uregex_setText(re, text1, -1, &status);
931        memset(buf, -1, sizeof(buf));
932        resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
933        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
934        TEST_ASSERT_STRING(expectedResult, buf, FALSE);
935        TEST_ASSERT(resultSize == expectedResultSize);
936        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
937
938        /* Do the replaceFirst again, without first resetting anything.
939         *  Should give the same results.
940         */
941        status = U_ZERO_ERROR;
942        memset(buf, -1, sizeof(buf));
943        resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
944        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
945        TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
946        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
947        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
948
949        /* NULL buffer, zero buffer length */
950        status = U_ZERO_ERROR;
951        resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
952        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
953        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
954
955        /* Buffer too small.  Try every size, which will tickle edge cases
956         * in uregex_appendReplacement (used by replaceAll)   */
957        for (i=0; i<expectedResultSize; i++) {
958            char  expected[80];
959            status = U_ZERO_ERROR;
960            memset(buf, -1, sizeof(buf));
961            resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
962            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
963            strcpy(expected, expectedResult);
964            expected[i] = 0;
965            TEST_ASSERT_STRING(expected, buf, FALSE);
966            TEST_ASSERT(resultSize == expectedResultSize);
967            TEST_ASSERT(buf[i] == (UChar)0xffff);
968        }
969
970        /* Buffer too small.  Same as previous test, except this time the replacement
971         * text is longer than the match capture group, making the length of the complete
972         * replacement longer than the original string.
973         */
974        for (i=0; i<expectedResultSize2; i++) {
975            char  expected[80];
976            status = U_ZERO_ERROR;
977            memset(buf, -1, sizeof(buf));
978            resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
979            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
980            strcpy(expected, expectedResult2);
981            expected[i] = 0;
982            TEST_ASSERT_STRING(expected, buf, FALSE);
983            TEST_ASSERT(resultSize == expectedResultSize2);
984            TEST_ASSERT(buf[i] == (UChar)0xffff);
985        }
986
987
988        uregex_close(re);
989    }
990
991
992    /*
993     *  appendReplacement()
994     */
995    {
996        UChar    text[100];
997        UChar    repl[100];
998        UChar    buf[100];
999        UChar   *bufPtr;
1000        int32_t  bufCap;
1001
1002
1003        status = U_ZERO_ERROR;
1004        re = uregex_openC(".*", 0, 0, &status);
1005        TEST_ASSERT_SUCCESS(status);
1006
1007        u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1008        u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1009        uregex_setText(re, text, -1, &status);
1010
1011        /* match covers whole target string */
1012        uregex_find(re, 0, &status);
1013        TEST_ASSERT_SUCCESS(status);
1014        bufPtr = buf;
1015        bufCap = UPRV_LENGTHOF(buf);
1016        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1017        TEST_ASSERT_SUCCESS(status);
1018        TEST_ASSERT_STRING("some other", buf, TRUE);
1019
1020        /* Match has \u \U escapes */
1021        uregex_find(re, 0, &status);
1022        TEST_ASSERT_SUCCESS(status);
1023        bufPtr = buf;
1024        bufCap = UPRV_LENGTHOF(buf);
1025        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1026        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1027        TEST_ASSERT_SUCCESS(status);
1028        TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1029
1030        /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1031        status = U_ZERO_ERROR;
1032        uregex_find(re, 0, &status);
1033        TEST_ASSERT_SUCCESS(status);
1034        bufPtr = buf;
1035        status = U_BUFFER_OVERFLOW_ERROR;
1036        uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1037        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1038
1039        uregex_close(re);
1040    }
1041
1042
1043    /*
1044     *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1045     */
1046
1047    /*
1048     *  split()
1049     */
1050    {
1051        UChar    textToSplit[80];
1052        UChar    text2[80];
1053        UChar    buf[200];
1054        UChar    *fields[10];
1055        int32_t  numFields;
1056        int32_t  requiredCapacity;
1057        int32_t  spaceNeeded;
1058        int32_t  sz;
1059
1060        u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1061        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1062
1063        status = U_ZERO_ERROR;
1064        re = uregex_openC(":", 0, NULL, &status);
1065
1066
1067        /*  Simple split */
1068
1069        uregex_setText(re, textToSplit, -1, &status);
1070        TEST_ASSERT_SUCCESS(status);
1071
1072        /* The TEST_ASSERT_SUCCESS call above should change too... */
1073        if (U_SUCCESS(status)) {
1074            memset(fields, -1, sizeof(fields));
1075            numFields =
1076                uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1077            TEST_ASSERT_SUCCESS(status);
1078
1079            /* The TEST_ASSERT_SUCCESS call above should change too... */
1080            if(U_SUCCESS(status)) {
1081                TEST_ASSERT(numFields == 3);
1082                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1083                TEST_ASSERT_STRING(" second", fields[1], TRUE);
1084                TEST_ASSERT_STRING("  third", fields[2], TRUE);
1085                TEST_ASSERT(fields[3] == NULL);
1086
1087                spaceNeeded = u_strlen(textToSplit) -
1088                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1089                            numFields;          /* Each field gets a NUL terminator */
1090
1091                TEST_ASSERT(spaceNeeded == requiredCapacity);
1092            }
1093        }
1094
1095        uregex_close(re);
1096
1097
1098        /*  Split with too few output strings available */
1099        status = U_ZERO_ERROR;
1100        re = uregex_openC(":", 0, NULL, &status);
1101        uregex_setText(re, textToSplit, -1, &status);
1102        TEST_ASSERT_SUCCESS(status);
1103
1104        /* The TEST_ASSERT_SUCCESS call above should change too... */
1105        if(U_SUCCESS(status)) {
1106            memset(fields, -1, sizeof(fields));
1107            numFields =
1108                uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1109            TEST_ASSERT_SUCCESS(status);
1110
1111            /* The TEST_ASSERT_SUCCESS call above should change too... */
1112            if(U_SUCCESS(status)) {
1113                TEST_ASSERT(numFields == 2);
1114                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1115                TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1116                TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1117
1118                spaceNeeded = u_strlen(textToSplit) -
1119                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1120                            numFields;          /* Each field gets a NUL terminator */
1121
1122                TEST_ASSERT(spaceNeeded == requiredCapacity);
1123
1124                /* Split with a range of output buffer sizes.  */
1125                spaceNeeded = u_strlen(textToSplit) -
1126                    (numFields - 1)  +  /* Field delimiters do not appear in output */
1127                    numFields;          /* Each field gets a NUL terminator */
1128
1129                for (sz=0; sz < spaceNeeded+1; sz++) {
1130                    memset(fields, -1, sizeof(fields));
1131                    status = U_ZERO_ERROR;
1132                    numFields =
1133                        uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1134                    if (sz >= spaceNeeded) {
1135                        TEST_ASSERT_SUCCESS(status);
1136                        TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1137                        TEST_ASSERT_STRING(" second", fields[1], TRUE);
1138                        TEST_ASSERT_STRING("  third", fields[2], TRUE);
1139                    } else {
1140                        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1141                    }
1142                    TEST_ASSERT(numFields == 3);
1143                    TEST_ASSERT(fields[3] == NULL);
1144                    TEST_ASSERT(spaceNeeded == requiredCapacity);
1145                }
1146            }
1147        }
1148
1149        uregex_close(re);
1150    }
1151
1152
1153
1154
1155    /* Split(), part 2.  Patterns with capture groups.  The capture group text
1156     *                   comes out as additional fields.  */
1157    {
1158        UChar    textToSplit[80];
1159        UChar    buf[200];
1160        UChar    *fields[10];
1161        int32_t  numFields;
1162        int32_t  requiredCapacity;
1163        int32_t  spaceNeeded;
1164        int32_t  sz;
1165
1166        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
1167
1168        status = U_ZERO_ERROR;
1169        re = uregex_openC("<(.*?)>", 0, NULL, &status);
1170
1171        uregex_setText(re, textToSplit, -1, &status);
1172        TEST_ASSERT_SUCCESS(status);
1173
1174        /* The TEST_ASSERT_SUCCESS call above should change too... */
1175        if(U_SUCCESS(status)) {
1176            memset(fields, -1, sizeof(fields));
1177            numFields =
1178                uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1179            TEST_ASSERT_SUCCESS(status);
1180
1181            /* The TEST_ASSERT_SUCCESS call above should change too... */
1182            if(U_SUCCESS(status)) {
1183                TEST_ASSERT(numFields == 5);
1184                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1185                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1186                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1187                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1188                TEST_ASSERT_STRING("  third", fields[4], TRUE);
1189                TEST_ASSERT(fields[5] == NULL);
1190                spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1191                TEST_ASSERT(spaceNeeded == requiredCapacity);
1192            }
1193        }
1194
1195        /*  Split with too few output strings available (2) */
1196        status = U_ZERO_ERROR;
1197        memset(fields, -1, sizeof(fields));
1198        numFields =
1199            uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1200        TEST_ASSERT_SUCCESS(status);
1201
1202        /* The TEST_ASSERT_SUCCESS call above should change too... */
1203        if(U_SUCCESS(status)) {
1204            TEST_ASSERT(numFields == 2);
1205            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1206            TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1207            TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1208
1209            spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1210            TEST_ASSERT(spaceNeeded == requiredCapacity);
1211        }
1212
1213        /*  Split with too few output strings available (3) */
1214        status = U_ZERO_ERROR;
1215        memset(fields, -1, sizeof(fields));
1216        numFields =
1217            uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1218        TEST_ASSERT_SUCCESS(status);
1219
1220        /* The TEST_ASSERT_SUCCESS call above should change too... */
1221        if(U_SUCCESS(status)) {
1222            TEST_ASSERT(numFields == 3);
1223            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1224            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1225            TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1226            TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1227
1228            spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1229            TEST_ASSERT(spaceNeeded == requiredCapacity);
1230        }
1231
1232        /*  Split with just enough output strings available (5) */
1233        status = U_ZERO_ERROR;
1234        memset(fields, -1, sizeof(fields));
1235        numFields =
1236            uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1237        TEST_ASSERT_SUCCESS(status);
1238
1239        /* The TEST_ASSERT_SUCCESS call above should change too... */
1240        if(U_SUCCESS(status)) {
1241            TEST_ASSERT(numFields == 5);
1242            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1243            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1244            TEST_ASSERT_STRING(" second", fields[2], TRUE);
1245            TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1246            TEST_ASSERT_STRING("  third", fields[4], TRUE);
1247            TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1248
1249            spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1250            TEST_ASSERT(spaceNeeded == requiredCapacity);
1251        }
1252
1253        /* Split, end of text is a field delimiter.   */
1254        status = U_ZERO_ERROR;
1255        sz = strlen("first <tag-a> second<tag-b>");
1256        uregex_setText(re, textToSplit, sz, &status);
1257        TEST_ASSERT_SUCCESS(status);
1258
1259        /* The TEST_ASSERT_SUCCESS call above should change too... */
1260        if(U_SUCCESS(status)) {
1261            memset(fields, -1, sizeof(fields));
1262            numFields =
1263                uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1264            TEST_ASSERT_SUCCESS(status);
1265
1266            /* The TEST_ASSERT_SUCCESS call above should change too... */
1267            if(U_SUCCESS(status)) {
1268                TEST_ASSERT(numFields == 5);
1269                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1270                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1271                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1272                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1273                TEST_ASSERT_STRING("",        fields[4], TRUE);
1274                TEST_ASSERT(fields[5] == NULL);
1275                TEST_ASSERT(fields[8] == NULL);
1276                TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1277                spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1278                TEST_ASSERT(spaceNeeded == requiredCapacity);
1279            }
1280        }
1281
1282        uregex_close(re);
1283    }
1284
1285    /*
1286     * set/getTimeLimit
1287     */
1288     TEST_SETUP("abc$", "abcdef", 0);
1289     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1290     uregex_setTimeLimit(re, 1000, &status);
1291     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1292     TEST_ASSERT_SUCCESS(status);
1293     uregex_setTimeLimit(re, -1, &status);
1294     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1295     status = U_ZERO_ERROR;
1296     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1297     TEST_TEARDOWN;
1298
1299     /*
1300      * set/get Stack Limit
1301      */
1302     TEST_SETUP("abc$", "abcdef", 0);
1303     TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1304     uregex_setStackLimit(re, 40000, &status);
1305     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1306     TEST_ASSERT_SUCCESS(status);
1307     uregex_setStackLimit(re, -1, &status);
1308     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1309     status = U_ZERO_ERROR;
1310     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1311     TEST_TEARDOWN;
1312
1313
1314     /*
1315      * Get/Set callback functions
1316      *     This test is copied from intltest regex/Callbacks
1317      *     The pattern and test data will run long enough to cause the callback
1318      *       to be invoked.  The nested '+' operators give exponential time
1319      *       behavior with increasing string length.
1320      */
1321     TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1322     callBackContext cbInfo = {4, 0, 0};
1323     const void     *pContext   = &cbInfo;
1324     URegexMatchCallback    *returnedFn = &TestCallbackFn;
1325
1326     /*  Getting the callback fn when it hasn't been set must return NULL  */
1327     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1328     TEST_ASSERT_SUCCESS(status);
1329     TEST_ASSERT(returnedFn == NULL);
1330     TEST_ASSERT(pContext == NULL);
1331
1332     /* Set thecallback and do a match.                                   */
1333     /* The callback function should record that it has been called.      */
1334     uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1335     TEST_ASSERT_SUCCESS(status);
1336     TEST_ASSERT(cbInfo.numCalls == 0);
1337     TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1338     TEST_ASSERT_SUCCESS(status);
1339     TEST_ASSERT(cbInfo.numCalls > 0);
1340
1341     /* Getting the callback should return the values that were set above.  */
1342     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1343     TEST_ASSERT(returnedFn == &TestCallbackFn);
1344     TEST_ASSERT(pContext == &cbInfo);
1345
1346     TEST_TEARDOWN;
1347}
1348
1349
1350
1351static void TestBug4315(void) {
1352    UErrorCode      theICUError = U_ZERO_ERROR;
1353    URegularExpression *theRegEx;
1354    UChar           *textBuff;
1355    const char      *thePattern;
1356    UChar            theString[100];
1357    UChar           *destFields[24];
1358    int32_t         neededLength1;
1359    int32_t         neededLength2;
1360
1361    int32_t         wordCount = 0;
1362    int32_t         destFieldsSize = 24;
1363
1364    thePattern  = "ck ";
1365    u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1366
1367    /* open a regex */
1368    theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1369    TEST_ASSERT_SUCCESS(theICUError);
1370
1371    /* set the input string */
1372    uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1373    TEST_ASSERT_SUCCESS(theICUError);
1374
1375    /* split */
1376    /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1377     *  error occurs! */
1378    wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1379        destFieldsSize, &theICUError);
1380
1381    TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1382    TEST_ASSERT(wordCount==3);
1383
1384    if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1385    {
1386        theICUError = U_ZERO_ERROR;
1387        textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1388        wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1389            destFields, destFieldsSize, &theICUError);
1390        TEST_ASSERT(wordCount==3);
1391        TEST_ASSERT_SUCCESS(theICUError);
1392        TEST_ASSERT(neededLength1 == neededLength2);
1393        TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1394        TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1395        TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1396        TEST_ASSERT(destFields[3] == NULL);
1397        free(textBuff);
1398    }
1399    uregex_close(theRegEx);
1400}
1401
1402/* Based on TestRegexCAPI() */
1403static void TestUTextAPI(void) {
1404    UErrorCode           status = U_ZERO_ERROR;
1405    URegularExpression  *re;
1406    UText                patternText = UTEXT_INITIALIZER;
1407    UChar                pat[200];
1408    const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1409
1410    /* Mimimalist open/close */
1411    utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1412    re = uregex_openUText(&patternText, 0, 0, &status);
1413    if (U_FAILURE(status)) {
1414         log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1415         utext_close(&patternText);
1416         return;
1417    }
1418    uregex_close(re);
1419
1420    /* Open with all flag values set */
1421    status = U_ZERO_ERROR;
1422    re = uregex_openUText(&patternText,
1423        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1424        0, &status);
1425    TEST_ASSERT_SUCCESS(status);
1426    uregex_close(re);
1427
1428    /* Open with an invalid flag */
1429    status = U_ZERO_ERROR;
1430    re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1431    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1432    uregex_close(re);
1433
1434    /* open with an invalid parameter */
1435    status = U_ZERO_ERROR;
1436    re = uregex_openUText(NULL,
1437        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1438    TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1439
1440    /*
1441     *  clone
1442     */
1443    {
1444        URegularExpression *clone1;
1445        URegularExpression *clone2;
1446        URegularExpression *clone3;
1447        UChar  testString1[30];
1448        UChar  testString2[30];
1449        UBool  result;
1450
1451
1452        status = U_ZERO_ERROR;
1453        re = uregex_openUText(&patternText, 0, 0, &status);
1454        TEST_ASSERT_SUCCESS(status);
1455        clone1 = uregex_clone(re, &status);
1456        TEST_ASSERT_SUCCESS(status);
1457        TEST_ASSERT(clone1 != NULL);
1458
1459        status = U_ZERO_ERROR;
1460        clone2 = uregex_clone(re, &status);
1461        TEST_ASSERT_SUCCESS(status);
1462        TEST_ASSERT(clone2 != NULL);
1463        uregex_close(re);
1464
1465        status = U_ZERO_ERROR;
1466        clone3 = uregex_clone(clone2, &status);
1467        TEST_ASSERT_SUCCESS(status);
1468        TEST_ASSERT(clone3 != NULL);
1469
1470        u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1471        u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1472
1473        status = U_ZERO_ERROR;
1474        uregex_setText(clone1, testString1, -1, &status);
1475        TEST_ASSERT_SUCCESS(status);
1476        result = uregex_lookingAt(clone1, 0, &status);
1477        TEST_ASSERT_SUCCESS(status);
1478        TEST_ASSERT(result==TRUE);
1479
1480        status = U_ZERO_ERROR;
1481        uregex_setText(clone2, testString2, -1, &status);
1482        TEST_ASSERT_SUCCESS(status);
1483        result = uregex_lookingAt(clone2, 0, &status);
1484        TEST_ASSERT_SUCCESS(status);
1485        TEST_ASSERT(result==FALSE);
1486        result = uregex_find(clone2, 0, &status);
1487        TEST_ASSERT_SUCCESS(status);
1488        TEST_ASSERT(result==TRUE);
1489
1490        uregex_close(clone1);
1491        uregex_close(clone2);
1492        uregex_close(clone3);
1493
1494    }
1495
1496    /*
1497     *  pattern() and patternText()
1498     */
1499    {
1500        const UChar  *resultPat;
1501        int32_t       resultLen;
1502        UText        *resultText;
1503        const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1504        const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1505        u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1506        status = U_ZERO_ERROR;
1507
1508        utext_openUTF8(&patternText, str_hello, -1, &status);
1509        re = uregex_open(pat, -1, 0, NULL, &status);
1510        resultPat = uregex_pattern(re, &resultLen, &status);
1511        TEST_ASSERT_SUCCESS(status);
1512
1513        /* The TEST_ASSERT_SUCCESS above should change too... */
1514        if (U_SUCCESS(status)) {
1515            TEST_ASSERT(resultLen == -1);
1516            TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1517        }
1518
1519        resultText = uregex_patternUText(re, &status);
1520        TEST_ASSERT_SUCCESS(status);
1521        TEST_ASSERT_UTEXT(str_hello, resultText);
1522
1523        uregex_close(re);
1524
1525        status = U_ZERO_ERROR;
1526        re = uregex_open(pat, 3, 0, NULL, &status);
1527        resultPat = uregex_pattern(re, &resultLen, &status);
1528        TEST_ASSERT_SUCCESS(status);
1529
1530        /* The TEST_ASSERT_SUCCESS above should change too... */
1531        if (U_SUCCESS(status)) {
1532            TEST_ASSERT(resultLen == 3);
1533            TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1534            TEST_ASSERT(u_strlen(resultPat) == 3);
1535        }
1536
1537        resultText = uregex_patternUText(re, &status);
1538        TEST_ASSERT_SUCCESS(status);
1539        TEST_ASSERT_UTEXT(str_hel, resultText);
1540
1541        uregex_close(re);
1542    }
1543
1544    /*
1545     *  setUText() and lookingAt()
1546     */
1547    {
1548        UText  text1 = UTEXT_INITIALIZER;
1549        UText  text2 = UTEXT_INITIALIZER;
1550        UBool  result;
1551        const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1552        const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1553        const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1554        status = U_ZERO_ERROR;
1555        utext_openUTF8(&text1, str_abcccd, -1, &status);
1556        utext_openUTF8(&text2, str_abcccxd, -1, &status);
1557
1558        utext_openUTF8(&patternText, str_abcd, -1, &status);
1559        re = uregex_openUText(&patternText, 0, NULL, &status);
1560        TEST_ASSERT_SUCCESS(status);
1561
1562        /* Operation before doing a setText should fail... */
1563        status = U_ZERO_ERROR;
1564        uregex_lookingAt(re, 0, &status);
1565        TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1566
1567        status = U_ZERO_ERROR;
1568        uregex_setUText(re, &text1, &status);
1569        result = uregex_lookingAt(re, 0, &status);
1570        TEST_ASSERT(result == TRUE);
1571        TEST_ASSERT_SUCCESS(status);
1572
1573        status = U_ZERO_ERROR;
1574        uregex_setUText(re, &text2, &status);
1575        result = uregex_lookingAt(re, 0, &status);
1576        TEST_ASSERT(result == FALSE);
1577        TEST_ASSERT_SUCCESS(status);
1578
1579        status = U_ZERO_ERROR;
1580        uregex_setUText(re, &text1, &status);
1581        result = uregex_lookingAt(re, 0, &status);
1582        TEST_ASSERT(result == TRUE);
1583        TEST_ASSERT_SUCCESS(status);
1584
1585        uregex_close(re);
1586        utext_close(&text1);
1587        utext_close(&text2);
1588    }
1589
1590
1591    /*
1592     *  getText() and getUText()
1593     */
1594    {
1595        UText  text1 = UTEXT_INITIALIZER;
1596        UText  text2 = UTEXT_INITIALIZER;
1597        UChar  text2Chars[20];
1598        UText  *resultText;
1599        const UChar   *result;
1600        int32_t  textLength;
1601        const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1602        const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1603        const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1604
1605
1606        status = U_ZERO_ERROR;
1607        utext_openUTF8(&text1, str_abcccd, -1, &status);
1608        u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1609        utext_openUChars(&text2, text2Chars, -1, &status);
1610
1611        utext_openUTF8(&patternText, str_abcd, -1, &status);
1612        re = uregex_openUText(&patternText, 0, NULL, &status);
1613
1614        /* First set a UText */
1615        uregex_setUText(re, &text1, &status);
1616        resultText = uregex_getUText(re, NULL, &status);
1617        TEST_ASSERT_SUCCESS(status);
1618        TEST_ASSERT(resultText != &text1);
1619        utext_setNativeIndex(resultText, 0);
1620        utext_setNativeIndex(&text1, 0);
1621        TEST_ASSERT(testUTextEqual(resultText, &text1));
1622        utext_close(resultText);
1623
1624        result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1625        (void)result;    /* Suppress set but not used warning. */
1626        TEST_ASSERT(textLength == -1 || textLength == 6);
1627        resultText = uregex_getUText(re, NULL, &status);
1628        TEST_ASSERT_SUCCESS(status);
1629        TEST_ASSERT(resultText != &text1);
1630        utext_setNativeIndex(resultText, 0);
1631        utext_setNativeIndex(&text1, 0);
1632        TEST_ASSERT(testUTextEqual(resultText, &text1));
1633        utext_close(resultText);
1634
1635        /* Then set a UChar * */
1636        uregex_setText(re, text2Chars, 7, &status);
1637        resultText = uregex_getUText(re, NULL, &status);
1638        TEST_ASSERT_SUCCESS(status);
1639        utext_setNativeIndex(resultText, 0);
1640        utext_setNativeIndex(&text2, 0);
1641        TEST_ASSERT(testUTextEqual(resultText, &text2));
1642        utext_close(resultText);
1643        result = uregex_getText(re, &textLength, &status);
1644        TEST_ASSERT(textLength == 7);
1645
1646        uregex_close(re);
1647        utext_close(&text1);
1648        utext_close(&text2);
1649    }
1650
1651    /*
1652     *  matches()
1653     */
1654    {
1655        UText   text1 = UTEXT_INITIALIZER;
1656        UBool   result;
1657        UText   nullText = UTEXT_INITIALIZER;
1658        const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1659        const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1660
1661        status = U_ZERO_ERROR;
1662        utext_openUTF8(&text1, str_abcccde, -1, &status);
1663        utext_openUTF8(&patternText, str_abcd, -1, &status);
1664        re = uregex_openUText(&patternText, 0, NULL, &status);
1665
1666        uregex_setUText(re, &text1, &status);
1667        result = uregex_matches(re, 0, &status);
1668        TEST_ASSERT(result == FALSE);
1669        TEST_ASSERT_SUCCESS(status);
1670        uregex_close(re);
1671
1672        status = U_ZERO_ERROR;
1673        re = uregex_openC(".?", 0, NULL, &status);
1674        uregex_setUText(re, &text1, &status);
1675        result = uregex_matches(re, 7, &status);
1676        TEST_ASSERT(result == TRUE);
1677        TEST_ASSERT_SUCCESS(status);
1678
1679        status = U_ZERO_ERROR;
1680        utext_openUTF8(&nullText, "", -1, &status);
1681        uregex_setUText(re, &nullText, &status);
1682        TEST_ASSERT_SUCCESS(status);
1683        result = uregex_matches(re, 0, &status);
1684        TEST_ASSERT(result == TRUE);
1685        TEST_ASSERT_SUCCESS(status);
1686
1687        uregex_close(re);
1688        utext_close(&text1);
1689        utext_close(&nullText);
1690    }
1691
1692
1693    /*
1694     *  lookingAt()    Used in setText test.
1695     */
1696
1697
1698    /*
1699     *  find(), findNext, start, end, reset
1700     */
1701    {
1702        UChar    text1[50];
1703        UBool    result;
1704        u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
1705        status = U_ZERO_ERROR;
1706        re = uregex_openC("rx", 0, NULL, &status);
1707
1708        uregex_setText(re, text1, -1, &status);
1709        result = uregex_find(re, 0, &status);
1710        TEST_ASSERT(result == TRUE);
1711        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1712        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1713        TEST_ASSERT_SUCCESS(status);
1714
1715        result = uregex_find(re, 9, &status);
1716        TEST_ASSERT(result == TRUE);
1717        TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1718        TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1719        TEST_ASSERT_SUCCESS(status);
1720
1721        result = uregex_find(re, 14, &status);
1722        TEST_ASSERT(result == FALSE);
1723        TEST_ASSERT_SUCCESS(status);
1724
1725        status = U_ZERO_ERROR;
1726        uregex_reset(re, 0, &status);
1727
1728        result = uregex_findNext(re, &status);
1729        TEST_ASSERT(result == TRUE);
1730        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1731        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1732        TEST_ASSERT_SUCCESS(status);
1733
1734        result = uregex_findNext(re, &status);
1735        TEST_ASSERT(result == TRUE);
1736        TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1737        TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1738        TEST_ASSERT_SUCCESS(status);
1739
1740        status = U_ZERO_ERROR;
1741        uregex_reset(re, 12, &status);
1742
1743        result = uregex_findNext(re, &status);
1744        TEST_ASSERT(result == TRUE);
1745        TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1746        TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1747        TEST_ASSERT_SUCCESS(status);
1748
1749        result = uregex_findNext(re, &status);
1750        TEST_ASSERT(result == FALSE);
1751        TEST_ASSERT_SUCCESS(status);
1752
1753        uregex_close(re);
1754    }
1755
1756    /*
1757     *  groupUText()
1758     */
1759    {
1760        UChar    text1[80];
1761        UText   *actual;
1762        UBool    result;
1763        int64_t  groupLen = 0;
1764        UChar    groupBuf[20];
1765
1766        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
1767
1768        status = U_ZERO_ERROR;
1769        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1770        TEST_ASSERT_SUCCESS(status);
1771
1772        uregex_setText(re, text1, -1, &status);
1773        result = uregex_find(re, 0, &status);
1774        TEST_ASSERT(result==TRUE);
1775
1776        /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1777        status = U_ZERO_ERROR;
1778        actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1779        TEST_ASSERT_SUCCESS(status);
1780
1781        TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
1782        TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
1783        utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1784
1785        TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1786        utext_close(actual);
1787
1788        /*  Capture group #1.  Should succeed. */
1789        status = U_ZERO_ERROR;
1790
1791        actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1792        TEST_ASSERT_SUCCESS(status);
1793        TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
1794                                                           /*    (within the string text1)           */
1795        TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
1796        utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1797        TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1798
1799        utext_close(actual);
1800
1801        /*  Capture group out of range.  Error. */
1802        status = U_ZERO_ERROR;
1803        actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1804        TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1805        utext_close(actual);
1806
1807        uregex_close(re);
1808    }
1809
1810    /*
1811     *  replaceFirst()
1812     */
1813    {
1814        UChar    text1[80];
1815        UChar    text2[80];
1816        UText    replText = UTEXT_INITIALIZER;
1817        UText   *result;
1818        const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1819        const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1820        const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1821               0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1822        const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1823        const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1824        status = U_ZERO_ERROR;
1825        u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1826        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1827        utext_openUTF8(&replText, str_1x, -1, &status);
1828
1829        re = uregex_openC("x(.*?)x", 0, NULL, &status);
1830        TEST_ASSERT_SUCCESS(status);
1831
1832        /*  Normal case, with match */
1833        uregex_setText(re, text1, -1, &status);
1834        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1835        TEST_ASSERT_SUCCESS(status);
1836        TEST_ASSERT_UTEXT(str_Replxxx, result);
1837        utext_close(result);
1838
1839        /* No match.  Text should copy to output with no changes.  */
1840        uregex_setText(re, text2, -1, &status);
1841        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1842        TEST_ASSERT_SUCCESS(status);
1843        TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1844        utext_close(result);
1845
1846        /* Unicode escapes */
1847        uregex_setText(re, text1, -1, &status);
1848        utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1849        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1850        TEST_ASSERT_SUCCESS(status);
1851        TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1852        utext_close(result);
1853
1854        uregex_close(re);
1855        utext_close(&replText);
1856    }
1857
1858
1859    /*
1860     *  replaceAll()
1861     */
1862    {
1863        UChar    text1[80];
1864        UChar    text2[80];
1865        UText    replText = UTEXT_INITIALIZER;
1866        UText   *result;
1867        const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1868        const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1869        const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1870        status = U_ZERO_ERROR;
1871        u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1872        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1873        utext_openUTF8(&replText, str_1, -1, &status);
1874
1875        re = uregex_openC("x(.*?)x", 0, NULL, &status);
1876        TEST_ASSERT_SUCCESS(status);
1877
1878        /*  Normal case, with match */
1879        uregex_setText(re, text1, -1, &status);
1880        result = uregex_replaceAllUText(re, &replText, NULL, &status);
1881        TEST_ASSERT_SUCCESS(status);
1882        TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1883        utext_close(result);
1884
1885        /* No match.  Text should copy to output with no changes.  */
1886        uregex_setText(re, text2, -1, &status);
1887        result = uregex_replaceAllUText(re, &replText, NULL, &status);
1888        TEST_ASSERT_SUCCESS(status);
1889        TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1890        utext_close(result);
1891
1892        uregex_close(re);
1893        utext_close(&replText);
1894    }
1895
1896
1897    /*
1898     *  appendReplacement()
1899     */
1900    {
1901        UChar    text[100];
1902        UChar    repl[100];
1903        UChar    buf[100];
1904        UChar   *bufPtr;
1905        int32_t  bufCap;
1906
1907        status = U_ZERO_ERROR;
1908        re = uregex_openC(".*", 0, 0, &status);
1909        TEST_ASSERT_SUCCESS(status);
1910
1911        u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1912        u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1913        uregex_setText(re, text, -1, &status);
1914
1915        /* match covers whole target string */
1916        uregex_find(re, 0, &status);
1917        TEST_ASSERT_SUCCESS(status);
1918        bufPtr = buf;
1919        bufCap = UPRV_LENGTHOF(buf);
1920        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1921        TEST_ASSERT_SUCCESS(status);
1922        TEST_ASSERT_STRING("some other", buf, TRUE);
1923
1924        /* Match has \u \U escapes */
1925        uregex_find(re, 0, &status);
1926        TEST_ASSERT_SUCCESS(status);
1927        bufPtr = buf;
1928        bufCap = UPRV_LENGTHOF(buf);
1929        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1930        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1931        TEST_ASSERT_SUCCESS(status);
1932        TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1933
1934        uregex_close(re);
1935    }
1936
1937
1938    /*
1939     *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1940     */
1941
1942    /*
1943     *  splitUText()
1944     */
1945    {
1946        UChar    textToSplit[80];
1947        UChar    text2[80];
1948        UText    *fields[10];
1949        int32_t  numFields;
1950        int32_t i;
1951
1952        u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1953        u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1954
1955        status = U_ZERO_ERROR;
1956        re = uregex_openC(":", 0, NULL, &status);
1957
1958
1959        /*  Simple split */
1960
1961        uregex_setText(re, textToSplit, -1, &status);
1962        TEST_ASSERT_SUCCESS(status);
1963
1964        /* The TEST_ASSERT_SUCCESS call above should change too... */
1965        if (U_SUCCESS(status)) {
1966            memset(fields, 0, sizeof(fields));
1967            numFields = uregex_splitUText(re, fields, 10, &status);
1968            TEST_ASSERT_SUCCESS(status);
1969
1970            /* The TEST_ASSERT_SUCCESS call above should change too... */
1971            if(U_SUCCESS(status)) {
1972              const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1973              const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1974              const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1975                TEST_ASSERT(numFields == 3);
1976                TEST_ASSERT_UTEXT(str_first,  fields[0]);
1977                TEST_ASSERT_UTEXT(str_second, fields[1]);
1978                TEST_ASSERT_UTEXT(str_third, fields[2]);
1979                TEST_ASSERT(fields[3] == NULL);
1980            }
1981            for(i = 0; i < numFields; i++) {
1982                utext_close(fields[i]);
1983            }
1984        }
1985
1986        uregex_close(re);
1987
1988
1989        /*  Split with too few output strings available */
1990        status = U_ZERO_ERROR;
1991        re = uregex_openC(":", 0, NULL, &status);
1992        uregex_setText(re, textToSplit, -1, &status);
1993        TEST_ASSERT_SUCCESS(status);
1994
1995        /* The TEST_ASSERT_SUCCESS call above should change too... */
1996        if(U_SUCCESS(status)) {
1997            fields[0] = NULL;
1998            fields[1] = NULL;
1999            fields[2] = &patternText;
2000            numFields = uregex_splitUText(re, fields, 2, &status);
2001            TEST_ASSERT_SUCCESS(status);
2002
2003            /* The TEST_ASSERT_SUCCESS call above should change too... */
2004            if(U_SUCCESS(status)) {
2005                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2006                const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2007                TEST_ASSERT(numFields == 2);
2008                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2009                TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2010                TEST_ASSERT(fields[2] == &patternText);
2011            }
2012            for(i = 0; i < numFields; i++) {
2013                utext_close(fields[i]);
2014            }
2015        }
2016
2017        uregex_close(re);
2018    }
2019
2020    /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2021     *                   comes out as additional fields.  */
2022    {
2023        UChar    textToSplit[80];
2024        UText    *fields[10];
2025        int32_t  numFields;
2026        int32_t i;
2027
2028        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
2029
2030        status = U_ZERO_ERROR;
2031        re = uregex_openC("<(.*?)>", 0, NULL, &status);
2032
2033        uregex_setText(re, textToSplit, -1, &status);
2034        TEST_ASSERT_SUCCESS(status);
2035
2036        /* The TEST_ASSERT_SUCCESS call above should change too... */
2037        if(U_SUCCESS(status)) {
2038            memset(fields, 0, sizeof(fields));
2039            numFields = uregex_splitUText(re, fields, 10, &status);
2040            TEST_ASSERT_SUCCESS(status);
2041
2042            /* The TEST_ASSERT_SUCCESS call above should change too... */
2043            if(U_SUCCESS(status)) {
2044                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2045                const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2046                const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2047                const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2048                const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2049
2050                TEST_ASSERT(numFields == 5);
2051                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2052                TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2053                TEST_ASSERT_UTEXT(str_second, fields[2]);
2054                TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2055                TEST_ASSERT_UTEXT(str_third, fields[4]);
2056                TEST_ASSERT(fields[5] == NULL);
2057            }
2058            for(i = 0; i < numFields; i++) {
2059                utext_close(fields[i]);
2060            }
2061        }
2062
2063        /*  Split with too few output strings available (2) */
2064        status = U_ZERO_ERROR;
2065        fields[0] = NULL;
2066        fields[1] = NULL;
2067        fields[2] = &patternText;
2068        numFields = uregex_splitUText(re, fields, 2, &status);
2069        TEST_ASSERT_SUCCESS(status);
2070
2071        /* The TEST_ASSERT_SUCCESS call above should change too... */
2072        if(U_SUCCESS(status)) {
2073            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2074            const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2075            TEST_ASSERT(numFields == 2);
2076            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2077            TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2078            TEST_ASSERT(fields[2] == &patternText);
2079        }
2080        for(i = 0; i < numFields; i++) {
2081            utext_close(fields[i]);
2082        }
2083
2084
2085        /*  Split with too few output strings available (3) */
2086        status = U_ZERO_ERROR;
2087        fields[0] = NULL;
2088        fields[1] = NULL;
2089        fields[2] = NULL;
2090        fields[3] = &patternText;
2091        numFields = uregex_splitUText(re, fields, 3, &status);
2092        TEST_ASSERT_SUCCESS(status);
2093
2094        /* The TEST_ASSERT_SUCCESS call above should change too... */
2095        if(U_SUCCESS(status)) {
2096            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2097            const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2098            const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2099            TEST_ASSERT(numFields == 3);
2100            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2101            TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2102            TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2103            TEST_ASSERT(fields[3] == &patternText);
2104        }
2105        for(i = 0; i < numFields; i++) {
2106            utext_close(fields[i]);
2107        }
2108
2109        /*  Split with just enough output strings available (5) */
2110        status = U_ZERO_ERROR;
2111        fields[0] = NULL;
2112        fields[1] = NULL;
2113        fields[2] = NULL;
2114        fields[3] = NULL;
2115        fields[4] = NULL;
2116        fields[5] = &patternText;
2117        numFields = uregex_splitUText(re, fields, 5, &status);
2118        TEST_ASSERT_SUCCESS(status);
2119
2120        /* The TEST_ASSERT_SUCCESS call above should change too... */
2121        if(U_SUCCESS(status)) {
2122            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2123            const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2124            const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2125            const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2126            const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2127
2128            TEST_ASSERT(numFields == 5);
2129            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2130            TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2131            TEST_ASSERT_UTEXT(str_second, fields[2]);
2132            TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2133            TEST_ASSERT_UTEXT(str_third, fields[4]);
2134            TEST_ASSERT(fields[5] == &patternText);
2135        }
2136        for(i = 0; i < numFields; i++) {
2137            utext_close(fields[i]);
2138        }
2139
2140        /* Split, end of text is a field delimiter.   */
2141        status = U_ZERO_ERROR;
2142        uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2143        TEST_ASSERT_SUCCESS(status);
2144
2145        /* The TEST_ASSERT_SUCCESS call above should change too... */
2146        if(U_SUCCESS(status)) {
2147            memset(fields, 0, sizeof(fields));
2148            fields[9] = &patternText;
2149            numFields = uregex_splitUText(re, fields, 9, &status);
2150            TEST_ASSERT_SUCCESS(status);
2151
2152            /* The TEST_ASSERT_SUCCESS call above should change too... */
2153            if(U_SUCCESS(status)) {
2154                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2155                const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2156                const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2157                const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2158                const char str_empty[] = { 0x00 };
2159
2160                TEST_ASSERT(numFields == 5);
2161                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2162                TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2163                TEST_ASSERT_UTEXT(str_second, fields[2]);
2164                TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2165                TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2166                TEST_ASSERT(fields[5] == NULL);
2167                TEST_ASSERT(fields[8] == NULL);
2168                TEST_ASSERT(fields[9] == &patternText);
2169            }
2170            for(i = 0; i < numFields; i++) {
2171                utext_close(fields[i]);
2172            }
2173        }
2174
2175        uregex_close(re);
2176    }
2177    utext_close(&patternText);
2178}
2179
2180
2181static void TestRefreshInput(void) {
2182    /*
2183     *  RefreshInput changes out the input of a URegularExpression without
2184     *    changing anything else in the match state.  Used with Java JNI,
2185     *    when Java moves the underlying string storage.   This test
2186     *    runs a find() loop, moving the text after the first match.
2187     *    The right number of matches should still be found.
2188     */
2189    UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2190    UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2191    UErrorCode status = U_ZERO_ERROR;
2192    URegularExpression *re;
2193    UText ut1 = UTEXT_INITIALIZER;
2194    UText ut2 = UTEXT_INITIALIZER;
2195
2196    re = uregex_openC("[ABC]", 0, 0, &status);
2197    TEST_ASSERT_SUCCESS(status);
2198
2199    utext_openUChars(&ut1, testStr, -1, &status);
2200    TEST_ASSERT_SUCCESS(status);
2201    uregex_setUText(re, &ut1, &status);
2202    TEST_ASSERT_SUCCESS(status);
2203
2204    /* Find the first match "A" in the original string */
2205    TEST_ASSERT(uregex_findNext(re, &status));
2206    TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2207
2208    /* Move the string, kill the original string.  */
2209    u_strcpy(movedStr, testStr);
2210    u_memset(testStr, 0, u_strlen(testStr));
2211    utext_openUChars(&ut2, movedStr, -1, &status);
2212    TEST_ASSERT_SUCCESS(status);
2213    uregex_refreshUText(re, &ut2, &status);
2214    TEST_ASSERT_SUCCESS(status);
2215
2216    /* Find the following two matches, now working in the moved string. */
2217    TEST_ASSERT(uregex_findNext(re, &status));
2218    TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2219    TEST_ASSERT(uregex_findNext(re, &status));
2220    TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2221    TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2222
2223    uregex_close(re);
2224}
2225
2226
2227static void TestBug8421(void) {
2228    /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2229     *             was failing.
2230     */
2231    URegularExpression *re;
2232    UErrorCode status = U_ZERO_ERROR;
2233    int32_t  limit = -1;
2234
2235    re = uregex_openC("abc", 0, 0, &status);
2236    TEST_ASSERT_SUCCESS(status);
2237
2238    limit = uregex_getTimeLimit(re, &status);
2239    TEST_ASSERT_SUCCESS(status);
2240    TEST_ASSERT(limit == 0);
2241
2242    uregex_setTimeLimit(re, 100, &status);
2243    TEST_ASSERT_SUCCESS(status);
2244    limit = uregex_getTimeLimit(re, &status);
2245    TEST_ASSERT_SUCCESS(status);
2246    TEST_ASSERT(limit == 100);
2247
2248    uregex_close(re);
2249}
2250
2251static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2252    return FALSE;
2253}
2254
2255static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2256    return FALSE;
2257}
2258
2259static void TestBug10815() {
2260  /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2261   *              when the callback function specified by uregex_setMatchCallback() returns FALSE
2262   */
2263    URegularExpression *re;
2264    UErrorCode status = U_ZERO_ERROR;
2265    UChar    text[100];
2266
2267
2268    // findNext() with a find progress callback function.
2269
2270    re = uregex_openC(".z", 0, 0, &status);
2271    TEST_ASSERT_SUCCESS(status);
2272
2273    u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
2274    uregex_setText(re, text, -1, &status);
2275    TEST_ASSERT_SUCCESS(status);
2276
2277    uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2278    TEST_ASSERT_SUCCESS(status);
2279
2280    uregex_findNext(re, &status);
2281    TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2282
2283    uregex_close(re);
2284
2285    // findNext() with a match progress callback function.
2286
2287    status = U_ZERO_ERROR;
2288    re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2289    TEST_ASSERT_SUCCESS(status);
2290
2291    // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2292    // it will appear to be stuck in a (near) infinite loop.
2293    u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
2294    uregex_setText(re, text, -1, &status);
2295    TEST_ASSERT_SUCCESS(status);
2296
2297    uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2298    TEST_ASSERT_SUCCESS(status);
2299
2300    uregex_findNext(re, &status);
2301    TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2302
2303    uregex_close(re);
2304}
2305
2306
2307#endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
2308