1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2004-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File reapits.c
9*
10*********************************************************************************/
11/*C API TEST FOR Regular Expressions */
12/**
13*   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
14*   try to test the full functionality.  It just calls each function and verifies that it
15*   works on a basic level.
16*
17*   More complete testing of regular expression functionality is done with the C++ tests.
18**/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_REGULAR_EXPRESSIONS
23
24#include <stdlib.h>
25#include <string.h>
26#include "unicode/uloc.h"
27#include "unicode/uregex.h"
28#include "unicode/ustring.h"
29#include "unicode/utext.h"
30#include "cintltst.h"
31
32#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
33log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
34
35#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
36log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
37
38/*
39 *   TEST_SETUP and TEST_TEARDOWN
40 *         macros to handle the boilerplate around setting up regex test cases.
41 *         parameteres to setup:
42 *              pattern:     The regex pattern, a (char *) null terminated C string.
43 *              testString:  The string data, also a (char *) C string.
44 *              flags:       Regex flags to set when compiling the pattern
45 *
46 *         Put arbitrary test code between SETUP and TEARDOWN.
47 *         're" is the compiled, ready-to-go  regular expression.
48 */
49#define TEST_SETUP(pattern, testString, flags) {  \
50    UChar   *srcString = NULL;  \
51    status = U_ZERO_ERROR; \
52    re = uregex_openC(pattern, flags, NULL, &status);  \
53    TEST_ASSERT_SUCCESS(status);   \
54    srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
55    u_uastrncpy(srcString, testString,  strlen(testString)+1); \
56    uregex_setText(re, srcString, -1, &status); \
57    TEST_ASSERT_SUCCESS(status);  \
58    if (U_SUCCESS(status)) {
59
60#define TEST_TEARDOWN  \
61    }  \
62    TEST_ASSERT_SUCCESS(status);  \
63    uregex_close(re);  \
64    free(srcString);   \
65    }
66
67
68/**
69 * @param expected utf-8 array of bytes to be expected
70 */
71static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
72     char     buf_inside_macro[120];
73     int32_t  len = (int32_t)strlen(expected);
74     UBool    success;
75     if (nulTerm) {
76         u_austrncpy(buf_inside_macro, (actual), len+1);
77         buf_inside_macro[len+2] = 0;
78         success = (strcmp((expected), buf_inside_macro) == 0);
79     } else {
80         u_austrncpy(buf_inside_macro, (actual), len);
81         buf_inside_macro[len+1] = 0;
82         success = (strncmp((expected), buf_inside_macro, len) == 0);
83     }
84     if (success == FALSE) {
85         log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
86             file, line, (expected), buf_inside_macro);
87     }
88}
89
90#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
91
92
93static UBool equals_utf8_utext(const char *utf8, UText *utext) {
94    int32_t u8i = 0;
95    UChar32 u8c = 0;
96    UChar32 utc = 0;
97    UBool   stringsEqual = TRUE;
98    utext_setNativeIndex(utext, 0);
99    for (;;) {
100        U8_NEXT_UNSAFE(utf8, u8i, u8c);
101        utc = utext_next32(utext);
102        if (u8c == 0 && utc == U_SENTINEL) {
103            break;
104        }
105        if (u8c != utc || u8c == 0) {
106            stringsEqual = FALSE;
107            break;
108        }
109    }
110    return stringsEqual;
111}
112
113
114static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
115    utext_setNativeIndex(actual, 0);
116    if (!equals_utf8_utext(expected, actual)) {
117        UChar32 c;
118        log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
119        c = utext_next32From(actual, 0);
120        while (c != U_SENTINEL) {
121            if (0x20<c && c <0x7e) {
122                log_err("%c", c);
123            } else {
124                log_err("%#x", c);
125            }
126            c = UTEXT_NEXT32(actual);
127        }
128        log_err("\"\n");
129    }
130}
131
132/*
133 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
134 *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
135 */
136#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
137
138static UBool testUTextEqual(UText *uta, UText *utb) {
139    UChar32 ca = 0;
140    UChar32 cb = 0;
141    utext_setNativeIndex(uta, 0);
142    utext_setNativeIndex(utb, 0);
143    do {
144        ca = utext_next32(uta);
145        cb = utext_next32(utb);
146        if (ca != cb) {
147            break;
148        }
149    } while (ca != U_SENTINEL);
150    return ca == cb;
151}
152
153
154
155
156static void TestRegexCAPI(void);
157static void TestBug4315(void);
158static void TestUTextAPI(void);
159static void TestRefreshInput(void);
160static void TestBug8421(void);
161
162void addURegexTest(TestNode** root);
163
164void addURegexTest(TestNode** root)
165{
166    addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
167    addTest(root, &TestBug4315,   "regex/TestBug4315");
168    addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
169    addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
170    addTest(root, &TestBug8421,   "regex/TestBug8421");
171}
172
173/*
174 * Call back function and context struct used for testing
175 *    regular expression user callbacks.  This test is mostly the same as
176 *   the corresponding C++ test in intltest.
177 */
178typedef struct callBackContext {
179    int32_t          maxCalls;
180    int32_t          numCalls;
181    int32_t          lastSteps;
182} callBackContext;
183
184static UBool U_EXPORT2 U_CALLCONV
185TestCallbackFn(const void *context, int32_t steps) {
186  callBackContext  *info = (callBackContext *)context;
187  if (info->lastSteps+1 != steps) {
188      log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
189  }
190  info->lastSteps = steps;
191  info->numCalls++;
192  return (info->numCalls < info->maxCalls);
193}
194
195/*
196 *   Regular Expression C API Tests
197 */
198static void TestRegexCAPI(void) {
199    UErrorCode           status = U_ZERO_ERROR;
200    URegularExpression  *re;
201    UChar                pat[200];
202    UChar               *minus1;
203
204    memset(&minus1, -1, sizeof(minus1));
205
206    /* Mimimalist open/close */
207    u_uastrncpy(pat, "abc*", sizeof(pat)/2);
208    re = uregex_open(pat, -1, 0, 0, &status);
209    if (U_FAILURE(status)) {
210         log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
211         return;
212    }
213    uregex_close(re);
214
215    /* Open with all flag values set */
216    status = U_ZERO_ERROR;
217    re = uregex_open(pat, -1,
218        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
219        0, &status);
220    TEST_ASSERT_SUCCESS(status);
221    uregex_close(re);
222
223    /* Open with an invalid flag */
224    status = U_ZERO_ERROR;
225    re = uregex_open(pat, -1, 0x40000000, 0, &status);
226    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
227    uregex_close(re);
228
229    /* Open with an unimplemented flag */
230    status = U_ZERO_ERROR;
231    re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
232    TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
233    uregex_close(re);
234
235    /* openC with an invalid parameter */
236    status = U_ZERO_ERROR;
237    re = uregex_openC(NULL,
238        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
239    TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
240
241    /* openC with an invalid parameter */
242    status = U_USELESS_COLLATOR_ERROR;
243    re = uregex_openC(NULL,
244        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
245    TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
246
247    /* openC   open from a C string */
248    {
249        const UChar   *p;
250        int32_t  len;
251        status = U_ZERO_ERROR;
252        re = uregex_openC("abc*", 0, 0, &status);
253        TEST_ASSERT_SUCCESS(status);
254        p = uregex_pattern(re, &len, &status);
255        TEST_ASSERT_SUCCESS(status);
256
257        /* The TEST_ASSERT_SUCCESS above should change too... */
258        if(U_SUCCESS(status)) {
259            u_uastrncpy(pat, "abc*", sizeof(pat)/2);
260            TEST_ASSERT(u_strcmp(pat, p) == 0);
261            TEST_ASSERT(len==(int32_t)strlen("abc*"));
262        }
263
264        uregex_close(re);
265
266        /*  TODO:  Open with ParseError parameter */
267    }
268
269    /*
270     *  clone
271     */
272    {
273        URegularExpression *clone1;
274        URegularExpression *clone2;
275        URegularExpression *clone3;
276        UChar  testString1[30];
277        UChar  testString2[30];
278        UBool  result;
279
280
281        status = U_ZERO_ERROR;
282        re = uregex_openC("abc*", 0, 0, &status);
283        TEST_ASSERT_SUCCESS(status);
284        clone1 = uregex_clone(re, &status);
285        TEST_ASSERT_SUCCESS(status);
286        TEST_ASSERT(clone1 != NULL);
287
288        status = U_ZERO_ERROR;
289        clone2 = uregex_clone(re, &status);
290        TEST_ASSERT_SUCCESS(status);
291        TEST_ASSERT(clone2 != NULL);
292        uregex_close(re);
293
294        status = U_ZERO_ERROR;
295        clone3 = uregex_clone(clone2, &status);
296        TEST_ASSERT_SUCCESS(status);
297        TEST_ASSERT(clone3 != NULL);
298
299        u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
300        u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
301
302        status = U_ZERO_ERROR;
303        uregex_setText(clone1, testString1, -1, &status);
304        TEST_ASSERT_SUCCESS(status);
305        result = uregex_lookingAt(clone1, 0, &status);
306        TEST_ASSERT_SUCCESS(status);
307        TEST_ASSERT(result==TRUE);
308
309        status = U_ZERO_ERROR;
310        uregex_setText(clone2, testString2, -1, &status);
311        TEST_ASSERT_SUCCESS(status);
312        result = uregex_lookingAt(clone2, 0, &status);
313        TEST_ASSERT_SUCCESS(status);
314        TEST_ASSERT(result==FALSE);
315        result = uregex_find(clone2, 0, &status);
316        TEST_ASSERT_SUCCESS(status);
317        TEST_ASSERT(result==TRUE);
318
319        uregex_close(clone1);
320        uregex_close(clone2);
321        uregex_close(clone3);
322
323    }
324
325    /*
326     *  pattern()
327    */
328    {
329        const UChar  *resultPat;
330        int32_t       resultLen;
331        u_uastrncpy(pat, "hello", sizeof(pat)/2);
332        status = U_ZERO_ERROR;
333        re = uregex_open(pat, -1, 0, NULL, &status);
334        resultPat = uregex_pattern(re, &resultLen, &status);
335        TEST_ASSERT_SUCCESS(status);
336
337        /* The TEST_ASSERT_SUCCESS above should change too... */
338        if (U_SUCCESS(status)) {
339            TEST_ASSERT(resultLen == -1);
340            TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
341        }
342
343        uregex_close(re);
344
345        status = U_ZERO_ERROR;
346        re = uregex_open(pat, 3, 0, NULL, &status);
347        resultPat = uregex_pattern(re, &resultLen, &status);
348        TEST_ASSERT_SUCCESS(status);
349        TEST_ASSERT_SUCCESS(status);
350
351        /* The TEST_ASSERT_SUCCESS above should change too... */
352        if (U_SUCCESS(status)) {
353            TEST_ASSERT(resultLen == 3);
354            TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
355            TEST_ASSERT(u_strlen(resultPat) == 3);
356        }
357
358        uregex_close(re);
359    }
360
361    /*
362     *  flags()
363     */
364    {
365        int32_t  t;
366
367        status = U_ZERO_ERROR;
368        re = uregex_open(pat, -1, 0, NULL, &status);
369        t  = uregex_flags(re, &status);
370        TEST_ASSERT_SUCCESS(status);
371        TEST_ASSERT(t == 0);
372        uregex_close(re);
373
374        status = U_ZERO_ERROR;
375        re = uregex_open(pat, -1, 0, NULL, &status);
376        t  = uregex_flags(re, &status);
377        TEST_ASSERT_SUCCESS(status);
378        TEST_ASSERT(t == 0);
379        uregex_close(re);
380
381        status = U_ZERO_ERROR;
382        re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
383        t  = uregex_flags(re, &status);
384        TEST_ASSERT_SUCCESS(status);
385        TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
386        uregex_close(re);
387    }
388
389    /*
390     *  setText() and lookingAt()
391     */
392    {
393        UChar  text1[50];
394        UChar  text2[50];
395        UBool  result;
396
397        u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
398        u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
399        status = U_ZERO_ERROR;
400        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
401        re = uregex_open(pat, -1, 0, NULL, &status);
402        TEST_ASSERT_SUCCESS(status);
403
404        /* Operation before doing a setText should fail... */
405        status = U_ZERO_ERROR;
406        uregex_lookingAt(re, 0, &status);
407        TEST_ASSERT( status== U_REGEX_INVALID_STATE);
408
409        status = U_ZERO_ERROR;
410        uregex_setText(re, text1, -1, &status);
411        result = uregex_lookingAt(re, 0, &status);
412        TEST_ASSERT(result == TRUE);
413        TEST_ASSERT_SUCCESS(status);
414
415        status = U_ZERO_ERROR;
416        uregex_setText(re, text2, -1, &status);
417        result = uregex_lookingAt(re, 0, &status);
418        TEST_ASSERT(result == FALSE);
419        TEST_ASSERT_SUCCESS(status);
420
421        status = U_ZERO_ERROR;
422        uregex_setText(re, text1, -1, &status);
423        result = uregex_lookingAt(re, 0, &status);
424        TEST_ASSERT(result == TRUE);
425        TEST_ASSERT_SUCCESS(status);
426
427        status = U_ZERO_ERROR;
428        uregex_setText(re, text1, 5, &status);
429        result = uregex_lookingAt(re, 0, &status);
430        TEST_ASSERT(result == FALSE);
431        TEST_ASSERT_SUCCESS(status);
432
433        status = U_ZERO_ERROR;
434        uregex_setText(re, text1, 6, &status);
435        result = uregex_lookingAt(re, 0, &status);
436        TEST_ASSERT(result == TRUE);
437        TEST_ASSERT_SUCCESS(status);
438
439        uregex_close(re);
440    }
441
442
443    /*
444     *  getText()
445     */
446    {
447        UChar    text1[50];
448        UChar    text2[50];
449        const UChar   *result;
450        int32_t  textLength;
451
452        u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
453        u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
454        status = U_ZERO_ERROR;
455        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
456        re = uregex_open(pat, -1, 0, NULL, &status);
457
458        uregex_setText(re, text1, -1, &status);
459        result = uregex_getText(re, &textLength, &status);
460        TEST_ASSERT(result == text1);
461        TEST_ASSERT(textLength == -1);
462        TEST_ASSERT_SUCCESS(status);
463
464        status = U_ZERO_ERROR;
465        uregex_setText(re, text2, 7, &status);
466        result = uregex_getText(re, &textLength, &status);
467        TEST_ASSERT(result == text2);
468        TEST_ASSERT(textLength == 7);
469        TEST_ASSERT_SUCCESS(status);
470
471        status = U_ZERO_ERROR;
472        uregex_setText(re, text2, 4, &status);
473        result = uregex_getText(re, &textLength, &status);
474        TEST_ASSERT(result == text2);
475        TEST_ASSERT(textLength == 4);
476        TEST_ASSERT_SUCCESS(status);
477        uregex_close(re);
478    }
479
480    /*
481     *  matches()
482     */
483    {
484        UChar   text1[50];
485        UBool   result;
486        int     len;
487        UChar   nullString[] = {0,0,0};
488
489        u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
490        status = U_ZERO_ERROR;
491        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
492        re = uregex_open(pat, -1, 0, NULL, &status);
493
494        uregex_setText(re, text1, -1, &status);
495        result = uregex_matches(re, 0, &status);
496        TEST_ASSERT(result == FALSE);
497        TEST_ASSERT_SUCCESS(status);
498
499        status = U_ZERO_ERROR;
500        uregex_setText(re, text1, 6, &status);
501        result = uregex_matches(re, 0, &status);
502        TEST_ASSERT(result == TRUE);
503        TEST_ASSERT_SUCCESS(status);
504
505        status = U_ZERO_ERROR;
506        uregex_setText(re, text1, 6, &status);
507        result = uregex_matches(re, 1, &status);
508        TEST_ASSERT(result == FALSE);
509        TEST_ASSERT_SUCCESS(status);
510        uregex_close(re);
511
512        status = U_ZERO_ERROR;
513        re = uregex_openC(".?", 0, NULL, &status);
514        uregex_setText(re, text1, -1, &status);
515        len = u_strlen(text1);
516        result = uregex_matches(re, len, &status);
517        TEST_ASSERT(result == TRUE);
518        TEST_ASSERT_SUCCESS(status);
519
520        status = U_ZERO_ERROR;
521        uregex_setText(re, nullString, -1, &status);
522        TEST_ASSERT_SUCCESS(status);
523        result = uregex_matches(re, 0, &status);
524        TEST_ASSERT(result == TRUE);
525        TEST_ASSERT_SUCCESS(status);
526        uregex_close(re);
527    }
528
529
530    /*
531     *  lookingAt()    Used in setText test.
532     */
533
534
535    /*
536     *  find(), findNext, start, end, reset
537     */
538    {
539        UChar    text1[50];
540        UBool    result;
541        u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
542        status = U_ZERO_ERROR;
543        re = uregex_openC("rx", 0, NULL, &status);
544
545        uregex_setText(re, text1, -1, &status);
546        result = uregex_find(re, 0, &status);
547        TEST_ASSERT(result == TRUE);
548        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
549        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
550        TEST_ASSERT_SUCCESS(status);
551
552        result = uregex_find(re, 9, &status);
553        TEST_ASSERT(result == TRUE);
554        TEST_ASSERT(uregex_start(re, 0, &status) == 11);
555        TEST_ASSERT(uregex_end(re, 0, &status) == 13);
556        TEST_ASSERT_SUCCESS(status);
557
558        result = uregex_find(re, 14, &status);
559        TEST_ASSERT(result == FALSE);
560        TEST_ASSERT_SUCCESS(status);
561
562        status = U_ZERO_ERROR;
563        uregex_reset(re, 0, &status);
564
565        result = uregex_findNext(re, &status);
566        TEST_ASSERT(result == TRUE);
567        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
568        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
569        TEST_ASSERT_SUCCESS(status);
570
571        result = uregex_findNext(re, &status);
572        TEST_ASSERT(result == TRUE);
573        TEST_ASSERT(uregex_start(re, 0, &status) == 6);
574        TEST_ASSERT(uregex_end(re, 0, &status) == 8);
575        TEST_ASSERT_SUCCESS(status);
576
577        status = U_ZERO_ERROR;
578        uregex_reset(re, 12, &status);
579
580        result = uregex_findNext(re, &status);
581        TEST_ASSERT(result == TRUE);
582        TEST_ASSERT(uregex_start(re, 0, &status) == 13);
583        TEST_ASSERT(uregex_end(re, 0, &status) == 15);
584        TEST_ASSERT_SUCCESS(status);
585
586        result = uregex_findNext(re, &status);
587        TEST_ASSERT(result == FALSE);
588        TEST_ASSERT_SUCCESS(status);
589
590        uregex_close(re);
591    }
592
593    /*
594     *  groupCount
595     */
596    {
597        int32_t result;
598
599        status = U_ZERO_ERROR;
600        re = uregex_openC("abc", 0, NULL, &status);
601        result = uregex_groupCount(re, &status);
602        TEST_ASSERT_SUCCESS(status);
603        TEST_ASSERT(result == 0);
604        uregex_close(re);
605
606        status = U_ZERO_ERROR;
607        re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
608        result = uregex_groupCount(re, &status);
609        TEST_ASSERT_SUCCESS(status);
610        TEST_ASSERT(result == 3);
611        uregex_close(re);
612
613    }
614
615
616    /*
617     *  group()
618     */
619    {
620        UChar    text1[80];
621        UChar    buf[80];
622        UBool    result;
623        int32_t  resultSz;
624        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
625
626        status = U_ZERO_ERROR;
627        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
628        TEST_ASSERT_SUCCESS(status);
629
630
631        uregex_setText(re, text1, -1, &status);
632        result = uregex_find(re, 0, &status);
633        TEST_ASSERT(result==TRUE);
634
635        /*  Capture Group 0, the full match.  Should succeed.  */
636        status = U_ZERO_ERROR;
637        resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
638        TEST_ASSERT_SUCCESS(status);
639        TEST_ASSERT_STRING("abc interior def", buf, TRUE);
640        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
641
642        /*  Capture group #1.  Should succeed. */
643        status = U_ZERO_ERROR;
644        resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
645        TEST_ASSERT_SUCCESS(status);
646        TEST_ASSERT_STRING(" interior ", buf, TRUE);
647        TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
648
649        /*  Capture group out of range.  Error. */
650        status = U_ZERO_ERROR;
651        uregex_group(re, 2, buf, sizeof(buf)/2, &status);
652        TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
653
654        /* NULL buffer, pure pre-flight */
655        status = U_ZERO_ERROR;
656        resultSz = uregex_group(re, 0, NULL, 0, &status);
657        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
658        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
659
660        /* Too small buffer, truncated string */
661        status = U_ZERO_ERROR;
662        memset(buf, -1, sizeof(buf));
663        resultSz = uregex_group(re, 0, buf, 5, &status);
664        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
665        TEST_ASSERT_STRING("abc i", buf, FALSE);
666        TEST_ASSERT(buf[5] == (UChar)0xffff);
667        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
668
669        /* Output string just fits buffer, no NUL term. */
670        status = U_ZERO_ERROR;
671        resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
672        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
673        TEST_ASSERT_STRING("abc interior def", buf, FALSE);
674        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
675        TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
676
677        uregex_close(re);
678
679    }
680
681    /*
682     *  Regions
683     */
684
685
686        /* SetRegion(), getRegion() do something  */
687        TEST_SETUP(".*", "0123456789ABCDEF", 0)
688        UChar resultString[40];
689        TEST_ASSERT(uregex_regionStart(re, &status) == 0);
690        TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
691        uregex_setRegion(re, 3, 6, &status);
692        TEST_ASSERT(uregex_regionStart(re, &status) == 3);
693        TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
694        TEST_ASSERT(uregex_findNext(re, &status));
695        TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
696        TEST_ASSERT_STRING("345", resultString, TRUE);
697        TEST_TEARDOWN;
698
699        /* find(start=-1) uses regions   */
700        TEST_SETUP(".*", "0123456789ABCDEF", 0);
701        uregex_setRegion(re, 4, 6, &status);
702        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
703        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
704        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
705        TEST_TEARDOWN;
706
707        /* find (start >=0) does not use regions   */
708        TEST_SETUP(".*", "0123456789ABCDEF", 0);
709        uregex_setRegion(re, 4, 6, &status);
710        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
711        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
712        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
713        TEST_TEARDOWN;
714
715        /* findNext() obeys regions    */
716        TEST_SETUP(".", "0123456789ABCDEF", 0);
717        uregex_setRegion(re, 4, 6, &status);
718        TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
719        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
720        TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
721        TEST_ASSERT(uregex_start(re, 0, &status) == 5);
722        TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
723        TEST_TEARDOWN;
724
725        /* matches(start=-1) uses regions                                           */
726        /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
727        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
728        uregex_setRegion(re, 4, 6, &status);
729        TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
730        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
731        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
732        TEST_TEARDOWN;
733
734        /* matches (start >=0) does not use regions       */
735        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
736        uregex_setRegion(re, 4, 6, &status);
737        TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
738        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
739        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
740        TEST_TEARDOWN;
741
742        /* lookingAt(start=-1) uses regions                                         */
743        /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
744        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
745        uregex_setRegion(re, 4, 6, &status);
746        TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
747        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
748        TEST_ASSERT(uregex_end(re, 0, &status) == 4);
749        TEST_TEARDOWN;
750
751        /* lookingAt (start >=0) does not use regions  */
752        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
753        uregex_setRegion(re, 4, 6, &status);
754        TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
755        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
756        TEST_ASSERT(uregex_end(re, 0, &status) == 0);
757        TEST_TEARDOWN;
758
759        /* hitEnd()       */
760        TEST_SETUP("[a-f]*", "abcdefghij", 0);
761        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
762        TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
763        TEST_TEARDOWN;
764
765        TEST_SETUP("[a-f]*", "abcdef", 0);
766        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
767        TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
768        TEST_TEARDOWN;
769
770        /* requireEnd   */
771        TEST_SETUP("abcd", "abcd", 0);
772        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
773        TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
774        TEST_TEARDOWN;
775
776        TEST_SETUP("abcd$", "abcd", 0);
777        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
778        TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
779        TEST_TEARDOWN;
780
781        /* anchoringBounds        */
782        TEST_SETUP("abc$", "abcdef", 0);
783        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
784        uregex_useAnchoringBounds(re, FALSE, &status);
785        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
786
787        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
788        uregex_useAnchoringBounds(re, TRUE, &status);
789        uregex_setRegion(re, 0, 3, &status);
790        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
791        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
792        TEST_TEARDOWN;
793
794        /* Transparent Bounds      */
795        TEST_SETUP("abc(?=def)", "abcdef", 0);
796        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
797        uregex_useTransparentBounds(re, TRUE, &status);
798        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
799
800        uregex_useTransparentBounds(re, FALSE, &status);
801        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
802        uregex_setRegion(re, 0, 3, &status);
803        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
804        uregex_useTransparentBounds(re, TRUE, &status);
805        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
806        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
807        TEST_TEARDOWN;
808
809
810    /*
811     *  replaceFirst()
812     */
813    {
814        UChar    text1[80];
815        UChar    text2[80];
816        UChar    replText[80];
817        UChar    buf[80];
818        int32_t  resultSz;
819        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
820        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
821        u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
822
823        status = U_ZERO_ERROR;
824        re = uregex_openC("x(.*?)x", 0, NULL, &status);
825        TEST_ASSERT_SUCCESS(status);
826
827        /*  Normal case, with match */
828        uregex_setText(re, text1, -1, &status);
829        resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
830        TEST_ASSERT_SUCCESS(status);
831        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
832        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
833
834        /* No match.  Text should copy to output with no changes.  */
835        status = U_ZERO_ERROR;
836        uregex_setText(re, text2, -1, &status);
837        resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
838        TEST_ASSERT_SUCCESS(status);
839        TEST_ASSERT_STRING("No match here.", buf, TRUE);
840        TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
841
842        /*  Match, output just fills buffer, no termination warning. */
843        status = U_ZERO_ERROR;
844        uregex_setText(re, text1, -1, &status);
845        memset(buf, -1, sizeof(buf));
846        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
847        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
848        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
849        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
850        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
851
852        /* Do the replaceFirst again, without first resetting anything.
853         *  Should give the same results.
854         */
855        status = U_ZERO_ERROR;
856        memset(buf, -1, sizeof(buf));
857        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
858        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
859        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
860        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
861        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
862
863        /* NULL buffer, zero buffer length */
864        status = U_ZERO_ERROR;
865        resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
866        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
867        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
868
869        /* Buffer too small by one */
870        status = U_ZERO_ERROR;
871        memset(buf, -1, sizeof(buf));
872        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
873        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
874        TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
875        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
876        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
877
878        uregex_close(re);
879    }
880
881
882    /*
883     *  replaceAll()
884     */
885    {
886        UChar    text1[80];          /*  "Replace xaax x1x x...x." */
887        UChar    text2[80];          /*  "No match Here"           */
888        UChar    replText[80];       /*  "<$1>"                    */
889        UChar    replText2[80];      /*  "<<$1>>"                  */
890        const char * pattern = "x(.*?)x";
891        const char * expectedResult = "Replace <aa> <1> <...>.";
892        const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
893        UChar    buf[80];
894        int32_t  resultSize;
895        int32_t  expectedResultSize;
896        int32_t  expectedResultSize2;
897        int32_t  i;
898
899        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
900        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
901        u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
902        u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
903        expectedResultSize = strlen(expectedResult);
904        expectedResultSize2 = strlen(expectedResult2);
905
906        status = U_ZERO_ERROR;
907        re = uregex_openC(pattern, 0, NULL, &status);
908        TEST_ASSERT_SUCCESS(status);
909
910        /*  Normal case, with match */
911        uregex_setText(re, text1, -1, &status);
912        resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
913        TEST_ASSERT_SUCCESS(status);
914        TEST_ASSERT_STRING(expectedResult, buf, TRUE);
915        TEST_ASSERT(resultSize == expectedResultSize);
916
917        /* No match.  Text should copy to output with no changes.  */
918        status = U_ZERO_ERROR;
919        uregex_setText(re, text2, -1, &status);
920        resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
921        TEST_ASSERT_SUCCESS(status);
922        TEST_ASSERT_STRING("No match here.", buf, TRUE);
923        TEST_ASSERT(resultSize == u_strlen(text2));
924
925        /*  Match, output just fills buffer, no termination warning. */
926        status = U_ZERO_ERROR;
927        uregex_setText(re, text1, -1, &status);
928        memset(buf, -1, sizeof(buf));
929        resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
930        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
931        TEST_ASSERT_STRING(expectedResult, buf, FALSE);
932        TEST_ASSERT(resultSize == expectedResultSize);
933        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
934
935        /* Do the replaceFirst again, without first resetting anything.
936         *  Should give the same results.
937         */
938        status = U_ZERO_ERROR;
939        memset(buf, -1, sizeof(buf));
940        resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
941        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
942        TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
943        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
944        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
945
946        /* NULL buffer, zero buffer length */
947        status = U_ZERO_ERROR;
948        resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
949        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
950        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
951
952        /* Buffer too small.  Try every size, which will tickle edge cases
953         * in uregex_appendReplacement (used by replaceAll)   */
954        for (i=0; i<expectedResultSize; i++) {
955            char  expected[80];
956            status = U_ZERO_ERROR;
957            memset(buf, -1, sizeof(buf));
958            resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
959            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
960            strcpy(expected, expectedResult);
961            expected[i] = 0;
962            TEST_ASSERT_STRING(expected, buf, FALSE);
963            TEST_ASSERT(resultSize == expectedResultSize);
964            TEST_ASSERT(buf[i] == (UChar)0xffff);
965        }
966
967        /* Buffer too small.  Same as previous test, except this time the replacement
968         * text is longer than the match capture group, making the length of the complete
969         * replacement longer than the original string.
970         */
971        for (i=0; i<expectedResultSize2; i++) {
972            char  expected[80];
973            status = U_ZERO_ERROR;
974            memset(buf, -1, sizeof(buf));
975            resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
976            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
977            strcpy(expected, expectedResult2);
978            expected[i] = 0;
979            TEST_ASSERT_STRING(expected, buf, FALSE);
980            TEST_ASSERT(resultSize == expectedResultSize2);
981            TEST_ASSERT(buf[i] == (UChar)0xffff);
982        }
983
984
985        uregex_close(re);
986    }
987
988
989    /*
990     *  appendReplacement()
991     */
992    {
993        UChar    text[100];
994        UChar    repl[100];
995        UChar    buf[100];
996        UChar   *bufPtr;
997        int32_t  bufCap;
998
999
1000        status = U_ZERO_ERROR;
1001        re = uregex_openC(".*", 0, 0, &status);
1002        TEST_ASSERT_SUCCESS(status);
1003
1004        u_uastrncpy(text, "whatever",  sizeof(text)/2);
1005        u_uastrncpy(repl, "some other", sizeof(repl)/2);
1006        uregex_setText(re, text, -1, &status);
1007
1008        /* match covers whole target string */
1009        uregex_find(re, 0, &status);
1010        TEST_ASSERT_SUCCESS(status);
1011        bufPtr = buf;
1012        bufCap = sizeof(buf) / 2;
1013        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1014        TEST_ASSERT_SUCCESS(status);
1015        TEST_ASSERT_STRING("some other", buf, TRUE);
1016
1017        /* Match has \u \U escapes */
1018        uregex_find(re, 0, &status);
1019        TEST_ASSERT_SUCCESS(status);
1020        bufPtr = buf;
1021        bufCap = sizeof(buf) / 2;
1022        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1023        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1024        TEST_ASSERT_SUCCESS(status);
1025        TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1026
1027        /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1028        status = U_ZERO_ERROR;
1029        uregex_find(re, 0, &status);
1030        TEST_ASSERT_SUCCESS(status);
1031        bufPtr = buf;
1032        status = U_BUFFER_OVERFLOW_ERROR;
1033        uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1034        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1035
1036        uregex_close(re);
1037    }
1038
1039
1040    /*
1041     *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1042     */
1043
1044    /*
1045     *  split()
1046     */
1047    {
1048        UChar    textToSplit[80];
1049        UChar    text2[80];
1050        UChar    buf[200];
1051        UChar    *fields[10];
1052        int32_t  numFields;
1053        int32_t  requiredCapacity;
1054        int32_t  spaceNeeded;
1055        int32_t  sz;
1056
1057        u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1058        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1059
1060        status = U_ZERO_ERROR;
1061        re = uregex_openC(":", 0, NULL, &status);
1062
1063
1064        /*  Simple split */
1065
1066        uregex_setText(re, textToSplit, -1, &status);
1067        TEST_ASSERT_SUCCESS(status);
1068
1069        /* The TEST_ASSERT_SUCCESS call above should change too... */
1070        if (U_SUCCESS(status)) {
1071            memset(fields, -1, sizeof(fields));
1072            numFields =
1073                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1074            TEST_ASSERT_SUCCESS(status);
1075
1076            /* The TEST_ASSERT_SUCCESS call above should change too... */
1077            if(U_SUCCESS(status)) {
1078                TEST_ASSERT(numFields == 3);
1079                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1080                TEST_ASSERT_STRING(" second", fields[1], TRUE);
1081                TEST_ASSERT_STRING("  third", fields[2], TRUE);
1082                TEST_ASSERT(fields[3] == NULL);
1083
1084                spaceNeeded = u_strlen(textToSplit) -
1085                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1086                            numFields;          /* Each field gets a NUL terminator */
1087
1088                TEST_ASSERT(spaceNeeded == requiredCapacity);
1089            }
1090        }
1091
1092        uregex_close(re);
1093
1094
1095        /*  Split with too few output strings available */
1096        status = U_ZERO_ERROR;
1097        re = uregex_openC(":", 0, NULL, &status);
1098        uregex_setText(re, textToSplit, -1, &status);
1099        TEST_ASSERT_SUCCESS(status);
1100
1101        /* The TEST_ASSERT_SUCCESS call above should change too... */
1102        if(U_SUCCESS(status)) {
1103            memset(fields, -1, sizeof(fields));
1104            numFields =
1105                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1106            TEST_ASSERT_SUCCESS(status);
1107
1108            /* The TEST_ASSERT_SUCCESS call above should change too... */
1109            if(U_SUCCESS(status)) {
1110                TEST_ASSERT(numFields == 2);
1111                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1112                TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1113                TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1114
1115                spaceNeeded = u_strlen(textToSplit) -
1116                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1117                            numFields;          /* Each field gets a NUL terminator */
1118
1119                TEST_ASSERT(spaceNeeded == requiredCapacity);
1120
1121                /* Split with a range of output buffer sizes.  */
1122                spaceNeeded = u_strlen(textToSplit) -
1123                    (numFields - 1)  +  /* Field delimiters do not appear in output */
1124                    numFields;          /* Each field gets a NUL terminator */
1125
1126                for (sz=0; sz < spaceNeeded+1; sz++) {
1127                    memset(fields, -1, sizeof(fields));
1128                    status = U_ZERO_ERROR;
1129                    numFields =
1130                        uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1131                    if (sz >= spaceNeeded) {
1132                        TEST_ASSERT_SUCCESS(status);
1133                        TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1134                        TEST_ASSERT_STRING(" second", fields[1], TRUE);
1135                        TEST_ASSERT_STRING("  third", fields[2], TRUE);
1136                    } else {
1137                        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1138                    }
1139                    TEST_ASSERT(numFields == 3);
1140                    TEST_ASSERT(fields[3] == NULL);
1141                    TEST_ASSERT(spaceNeeded == requiredCapacity);
1142                }
1143            }
1144        }
1145
1146        uregex_close(re);
1147    }
1148
1149
1150
1151
1152    /* Split(), part 2.  Patterns with capture groups.  The capture group text
1153     *                   comes out as additional fields.  */
1154    {
1155        UChar    textToSplit[80];
1156        UChar    buf[200];
1157        UChar    *fields[10];
1158        int32_t  numFields;
1159        int32_t  requiredCapacity;
1160        int32_t  spaceNeeded;
1161        int32_t  sz;
1162
1163        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
1164
1165        status = U_ZERO_ERROR;
1166        re = uregex_openC("<(.*?)>", 0, NULL, &status);
1167
1168        uregex_setText(re, textToSplit, -1, &status);
1169        TEST_ASSERT_SUCCESS(status);
1170
1171        /* The TEST_ASSERT_SUCCESS call above should change too... */
1172        if(U_SUCCESS(status)) {
1173            memset(fields, -1, sizeof(fields));
1174            numFields =
1175                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1176            TEST_ASSERT_SUCCESS(status);
1177
1178            /* The TEST_ASSERT_SUCCESS call above should change too... */
1179            if(U_SUCCESS(status)) {
1180                TEST_ASSERT(numFields == 5);
1181                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1182                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1183                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1184                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1185                TEST_ASSERT_STRING("  third", fields[4], TRUE);
1186                TEST_ASSERT(fields[5] == NULL);
1187                spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1188                TEST_ASSERT(spaceNeeded == requiredCapacity);
1189            }
1190        }
1191
1192        /*  Split with too few output strings available (2) */
1193        status = U_ZERO_ERROR;
1194        memset(fields, -1, sizeof(fields));
1195        numFields =
1196            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1197        TEST_ASSERT_SUCCESS(status);
1198
1199        /* The TEST_ASSERT_SUCCESS call above should change too... */
1200        if(U_SUCCESS(status)) {
1201            TEST_ASSERT(numFields == 2);
1202            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1203            TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1204            TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1205
1206            spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1207            TEST_ASSERT(spaceNeeded == requiredCapacity);
1208        }
1209
1210        /*  Split with too few output strings available (3) */
1211        status = U_ZERO_ERROR;
1212        memset(fields, -1, sizeof(fields));
1213        numFields =
1214            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
1215        TEST_ASSERT_SUCCESS(status);
1216
1217        /* The TEST_ASSERT_SUCCESS call above should change too... */
1218        if(U_SUCCESS(status)) {
1219            TEST_ASSERT(numFields == 3);
1220            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1221            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1222            TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1223            TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1224
1225            spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1226            TEST_ASSERT(spaceNeeded == requiredCapacity);
1227        }
1228
1229        /*  Split with just enough output strings available (5) */
1230        status = U_ZERO_ERROR;
1231        memset(fields, -1, sizeof(fields));
1232        numFields =
1233            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
1234        TEST_ASSERT_SUCCESS(status);
1235
1236        /* The TEST_ASSERT_SUCCESS call above should change too... */
1237        if(U_SUCCESS(status)) {
1238            TEST_ASSERT(numFields == 5);
1239            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1240            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1241            TEST_ASSERT_STRING(" second", fields[2], TRUE);
1242            TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1243            TEST_ASSERT_STRING("  third", fields[4], TRUE);
1244            TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1245
1246            spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1247            TEST_ASSERT(spaceNeeded == requiredCapacity);
1248        }
1249
1250        /* Split, end of text is a field delimiter.   */
1251        status = U_ZERO_ERROR;
1252        sz = strlen("first <tag-a> second<tag-b>");
1253        uregex_setText(re, textToSplit, sz, &status);
1254        TEST_ASSERT_SUCCESS(status);
1255
1256        /* The TEST_ASSERT_SUCCESS call above should change too... */
1257        if(U_SUCCESS(status)) {
1258            memset(fields, -1, sizeof(fields));
1259            numFields =
1260                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
1261            TEST_ASSERT_SUCCESS(status);
1262
1263            /* The TEST_ASSERT_SUCCESS call above should change too... */
1264            if(U_SUCCESS(status)) {
1265                TEST_ASSERT(numFields == 5);
1266                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1267                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1268                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1269                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1270                TEST_ASSERT_STRING("",        fields[4], TRUE);
1271                TEST_ASSERT(fields[5] == NULL);
1272                TEST_ASSERT(fields[8] == NULL);
1273                TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1274                spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1275                TEST_ASSERT(spaceNeeded == requiredCapacity);
1276            }
1277        }
1278
1279        uregex_close(re);
1280    }
1281
1282    /*
1283     * set/getTimeLimit
1284     */
1285     TEST_SETUP("abc$", "abcdef", 0);
1286     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1287     uregex_setTimeLimit(re, 1000, &status);
1288     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1289     TEST_ASSERT_SUCCESS(status);
1290     uregex_setTimeLimit(re, -1, &status);
1291     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1292     status = U_ZERO_ERROR;
1293     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1294     TEST_TEARDOWN;
1295
1296     /*
1297      * set/get Stack Limit
1298      */
1299     TEST_SETUP("abc$", "abcdef", 0);
1300     TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1301     uregex_setStackLimit(re, 40000, &status);
1302     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1303     TEST_ASSERT_SUCCESS(status);
1304     uregex_setStackLimit(re, -1, &status);
1305     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1306     status = U_ZERO_ERROR;
1307     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1308     TEST_TEARDOWN;
1309
1310
1311     /*
1312      * Get/Set callback functions
1313      *     This test is copied from intltest regex/Callbacks
1314      *     The pattern and test data will run long enough to cause the callback
1315      *       to be invoked.  The nested '+' operators give exponential time
1316      *       behavior with increasing string length.
1317      */
1318     TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1319     callBackContext cbInfo = {4, 0, 0};
1320     const void     *pContext   = &cbInfo;
1321     URegexMatchCallback    *returnedFn = &TestCallbackFn;
1322
1323     /*  Getting the callback fn when it hasn't been set must return NULL  */
1324     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1325     TEST_ASSERT_SUCCESS(status);
1326     TEST_ASSERT(returnedFn == NULL);
1327     TEST_ASSERT(pContext == NULL);
1328
1329     /* Set thecallback and do a match.                                   */
1330     /* The callback function should record that it has been called.      */
1331     uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1332     TEST_ASSERT_SUCCESS(status);
1333     TEST_ASSERT(cbInfo.numCalls == 0);
1334     TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1335     TEST_ASSERT_SUCCESS(status);
1336     TEST_ASSERT(cbInfo.numCalls > 0);
1337
1338     /* Getting the callback should return the values that were set above.  */
1339     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1340     TEST_ASSERT(returnedFn == &TestCallbackFn);
1341     TEST_ASSERT(pContext == &cbInfo);
1342
1343     TEST_TEARDOWN;
1344}
1345
1346
1347
1348static void TestBug4315(void) {
1349    UErrorCode      theICUError = U_ZERO_ERROR;
1350    URegularExpression *theRegEx;
1351    UChar           *textBuff;
1352    const char      *thePattern;
1353    UChar            theString[100];
1354    UChar           *destFields[24];
1355    int32_t         neededLength1;
1356    int32_t         neededLength2;
1357
1358    int32_t         wordCount = 0;
1359    int32_t         destFieldsSize = 24;
1360
1361    thePattern  = "ck ";
1362    u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1363
1364    /* open a regex */
1365    theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1366    TEST_ASSERT_SUCCESS(theICUError);
1367
1368    /* set the input string */
1369    uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1370    TEST_ASSERT_SUCCESS(theICUError);
1371
1372    /* split */
1373    /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1374     *  error occurs! */
1375    wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1376        destFieldsSize, &theICUError);
1377
1378    TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1379    TEST_ASSERT(wordCount==3);
1380
1381    if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1382    {
1383        theICUError = U_ZERO_ERROR;
1384        textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1385        wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1386            destFields, destFieldsSize, &theICUError);
1387        TEST_ASSERT(wordCount==3);
1388        TEST_ASSERT_SUCCESS(theICUError);
1389        TEST_ASSERT(neededLength1 == neededLength2);
1390        TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1391        TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1392        TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1393        TEST_ASSERT(destFields[3] == NULL);
1394        free(textBuff);
1395    }
1396    uregex_close(theRegEx);
1397}
1398
1399/* Based on TestRegexCAPI() */
1400static void TestUTextAPI(void) {
1401    UErrorCode           status = U_ZERO_ERROR;
1402    URegularExpression  *re;
1403    UText                patternText = UTEXT_INITIALIZER;
1404    UChar                pat[200];
1405    const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1406
1407    /* Mimimalist open/close */
1408    utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1409    re = uregex_openUText(&patternText, 0, 0, &status);
1410    if (U_FAILURE(status)) {
1411         log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1412         utext_close(&patternText);
1413         return;
1414    }
1415    uregex_close(re);
1416
1417    /* Open with all flag values set */
1418    status = U_ZERO_ERROR;
1419    re = uregex_openUText(&patternText,
1420        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1421        0, &status);
1422    TEST_ASSERT_SUCCESS(status);
1423    uregex_close(re);
1424
1425    /* Open with an invalid flag */
1426    status = U_ZERO_ERROR;
1427    re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1428    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1429    uregex_close(re);
1430
1431    /* open with an invalid parameter */
1432    status = U_ZERO_ERROR;
1433    re = uregex_openUText(NULL,
1434        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1435    TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1436
1437    /*
1438     *  clone
1439     */
1440    {
1441        URegularExpression *clone1;
1442        URegularExpression *clone2;
1443        URegularExpression *clone3;
1444        UChar  testString1[30];
1445        UChar  testString2[30];
1446        UBool  result;
1447
1448
1449        status = U_ZERO_ERROR;
1450        re = uregex_openUText(&patternText, 0, 0, &status);
1451        TEST_ASSERT_SUCCESS(status);
1452        clone1 = uregex_clone(re, &status);
1453        TEST_ASSERT_SUCCESS(status);
1454        TEST_ASSERT(clone1 != NULL);
1455
1456        status = U_ZERO_ERROR;
1457        clone2 = uregex_clone(re, &status);
1458        TEST_ASSERT_SUCCESS(status);
1459        TEST_ASSERT(clone2 != NULL);
1460        uregex_close(re);
1461
1462        status = U_ZERO_ERROR;
1463        clone3 = uregex_clone(clone2, &status);
1464        TEST_ASSERT_SUCCESS(status);
1465        TEST_ASSERT(clone3 != NULL);
1466
1467        u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
1468        u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
1469
1470        status = U_ZERO_ERROR;
1471        uregex_setText(clone1, testString1, -1, &status);
1472        TEST_ASSERT_SUCCESS(status);
1473        result = uregex_lookingAt(clone1, 0, &status);
1474        TEST_ASSERT_SUCCESS(status);
1475        TEST_ASSERT(result==TRUE);
1476
1477        status = U_ZERO_ERROR;
1478        uregex_setText(clone2, testString2, -1, &status);
1479        TEST_ASSERT_SUCCESS(status);
1480        result = uregex_lookingAt(clone2, 0, &status);
1481        TEST_ASSERT_SUCCESS(status);
1482        TEST_ASSERT(result==FALSE);
1483        result = uregex_find(clone2, 0, &status);
1484        TEST_ASSERT_SUCCESS(status);
1485        TEST_ASSERT(result==TRUE);
1486
1487        uregex_close(clone1);
1488        uregex_close(clone2);
1489        uregex_close(clone3);
1490
1491    }
1492
1493    /*
1494     *  pattern() and patternText()
1495     */
1496    {
1497        const UChar  *resultPat;
1498        int32_t       resultLen;
1499        UText        *resultText;
1500        const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1501        const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1502        u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
1503        status = U_ZERO_ERROR;
1504
1505        utext_openUTF8(&patternText, str_hello, -1, &status);
1506        re = uregex_open(pat, -1, 0, NULL, &status);
1507        resultPat = uregex_pattern(re, &resultLen, &status);
1508        TEST_ASSERT_SUCCESS(status);
1509
1510        /* The TEST_ASSERT_SUCCESS above should change too... */
1511        if (U_SUCCESS(status)) {
1512            TEST_ASSERT(resultLen == -1);
1513            TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1514        }
1515
1516        resultText = uregex_patternUText(re, &status);
1517        TEST_ASSERT_SUCCESS(status);
1518        TEST_ASSERT_UTEXT(str_hello, resultText);
1519
1520        uregex_close(re);
1521
1522        status = U_ZERO_ERROR;
1523        re = uregex_open(pat, 3, 0, NULL, &status);
1524        resultPat = uregex_pattern(re, &resultLen, &status);
1525        TEST_ASSERT_SUCCESS(status);
1526
1527        /* The TEST_ASSERT_SUCCESS above should change too... */
1528        if (U_SUCCESS(status)) {
1529            TEST_ASSERT(resultLen == 3);
1530            TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1531            TEST_ASSERT(u_strlen(resultPat) == 3);
1532        }
1533
1534        resultText = uregex_patternUText(re, &status);
1535        TEST_ASSERT_SUCCESS(status);
1536        TEST_ASSERT_UTEXT(str_hel, resultText);
1537
1538        uregex_close(re);
1539    }
1540
1541    /*
1542     *  setUText() and lookingAt()
1543     */
1544    {
1545        UText  text1 = UTEXT_INITIALIZER;
1546        UText  text2 = UTEXT_INITIALIZER;
1547        UBool  result;
1548        const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1549        const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1550        const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1551        status = U_ZERO_ERROR;
1552        utext_openUTF8(&text1, str_abcccd, -1, &status);
1553        utext_openUTF8(&text2, str_abcccxd, -1, &status);
1554
1555        utext_openUTF8(&patternText, str_abcd, -1, &status);
1556        re = uregex_openUText(&patternText, 0, NULL, &status);
1557        TEST_ASSERT_SUCCESS(status);
1558
1559        /* Operation before doing a setText should fail... */
1560        status = U_ZERO_ERROR;
1561        uregex_lookingAt(re, 0, &status);
1562        TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1563
1564        status = U_ZERO_ERROR;
1565        uregex_setUText(re, &text1, &status);
1566        result = uregex_lookingAt(re, 0, &status);
1567        TEST_ASSERT(result == TRUE);
1568        TEST_ASSERT_SUCCESS(status);
1569
1570        status = U_ZERO_ERROR;
1571        uregex_setUText(re, &text2, &status);
1572        result = uregex_lookingAt(re, 0, &status);
1573        TEST_ASSERT(result == FALSE);
1574        TEST_ASSERT_SUCCESS(status);
1575
1576        status = U_ZERO_ERROR;
1577        uregex_setUText(re, &text1, &status);
1578        result = uregex_lookingAt(re, 0, &status);
1579        TEST_ASSERT(result == TRUE);
1580        TEST_ASSERT_SUCCESS(status);
1581
1582        uregex_close(re);
1583        utext_close(&text1);
1584        utext_close(&text2);
1585    }
1586
1587
1588    /*
1589     *  getText() and getUText()
1590     */
1591    {
1592        UText  text1 = UTEXT_INITIALIZER;
1593        UText  text2 = UTEXT_INITIALIZER;
1594        UChar  text2Chars[20];
1595        UText  *resultText;
1596        const UChar   *result;
1597        int32_t  textLength;
1598        const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1599        const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1600        const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1601
1602
1603        status = U_ZERO_ERROR;
1604        utext_openUTF8(&text1, str_abcccd, -1, &status);
1605        u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
1606        utext_openUChars(&text2, text2Chars, -1, &status);
1607
1608        utext_openUTF8(&patternText, str_abcd, -1, &status);
1609        re = uregex_openUText(&patternText, 0, NULL, &status);
1610
1611        /* First set a UText */
1612        uregex_setUText(re, &text1, &status);
1613        resultText = uregex_getUText(re, NULL, &status);
1614        TEST_ASSERT_SUCCESS(status);
1615        TEST_ASSERT(resultText != &text1);
1616        utext_setNativeIndex(resultText, 0);
1617        utext_setNativeIndex(&text1, 0);
1618        TEST_ASSERT(testUTextEqual(resultText, &text1));
1619        utext_close(resultText);
1620
1621        result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1622        (void)result;    /* Suppress set but not used warning. */
1623        TEST_ASSERT(textLength == -1 || textLength == 6);
1624        resultText = uregex_getUText(re, NULL, &status);
1625        TEST_ASSERT_SUCCESS(status);
1626        TEST_ASSERT(resultText != &text1);
1627        utext_setNativeIndex(resultText, 0);
1628        utext_setNativeIndex(&text1, 0);
1629        TEST_ASSERT(testUTextEqual(resultText, &text1));
1630        utext_close(resultText);
1631
1632        /* Then set a UChar * */
1633        uregex_setText(re, text2Chars, 7, &status);
1634        resultText = uregex_getUText(re, NULL, &status);
1635        TEST_ASSERT_SUCCESS(status);
1636        utext_setNativeIndex(resultText, 0);
1637        utext_setNativeIndex(&text2, 0);
1638        TEST_ASSERT(testUTextEqual(resultText, &text2));
1639        utext_close(resultText);
1640        result = uregex_getText(re, &textLength, &status);
1641        TEST_ASSERT(textLength == 7);
1642
1643        uregex_close(re);
1644        utext_close(&text1);
1645        utext_close(&text2);
1646    }
1647
1648    /*
1649     *  matches()
1650     */
1651    {
1652        UText   text1 = UTEXT_INITIALIZER;
1653        UBool   result;
1654        UText   nullText = UTEXT_INITIALIZER;
1655        const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1656        const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1657
1658        status = U_ZERO_ERROR;
1659        utext_openUTF8(&text1, str_abcccde, -1, &status);
1660        utext_openUTF8(&patternText, str_abcd, -1, &status);
1661        re = uregex_openUText(&patternText, 0, NULL, &status);
1662
1663        uregex_setUText(re, &text1, &status);
1664        result = uregex_matches(re, 0, &status);
1665        TEST_ASSERT(result == FALSE);
1666        TEST_ASSERT_SUCCESS(status);
1667        uregex_close(re);
1668
1669        status = U_ZERO_ERROR;
1670        re = uregex_openC(".?", 0, NULL, &status);
1671        uregex_setUText(re, &text1, &status);
1672        result = uregex_matches(re, 7, &status);
1673        TEST_ASSERT(result == TRUE);
1674        TEST_ASSERT_SUCCESS(status);
1675
1676        status = U_ZERO_ERROR;
1677        utext_openUTF8(&nullText, "", -1, &status);
1678        uregex_setUText(re, &nullText, &status);
1679        TEST_ASSERT_SUCCESS(status);
1680        result = uregex_matches(re, 0, &status);
1681        TEST_ASSERT(result == TRUE);
1682        TEST_ASSERT_SUCCESS(status);
1683
1684        uregex_close(re);
1685        utext_close(&text1);
1686        utext_close(&nullText);
1687    }
1688
1689
1690    /*
1691     *  lookingAt()    Used in setText test.
1692     */
1693
1694
1695    /*
1696     *  find(), findNext, start, end, reset
1697     */
1698    {
1699        UChar    text1[50];
1700        UBool    result;
1701        u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
1702        status = U_ZERO_ERROR;
1703        re = uregex_openC("rx", 0, NULL, &status);
1704
1705        uregex_setText(re, text1, -1, &status);
1706        result = uregex_find(re, 0, &status);
1707        TEST_ASSERT(result == TRUE);
1708        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1709        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1710        TEST_ASSERT_SUCCESS(status);
1711
1712        result = uregex_find(re, 9, &status);
1713        TEST_ASSERT(result == TRUE);
1714        TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1715        TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1716        TEST_ASSERT_SUCCESS(status);
1717
1718        result = uregex_find(re, 14, &status);
1719        TEST_ASSERT(result == FALSE);
1720        TEST_ASSERT_SUCCESS(status);
1721
1722        status = U_ZERO_ERROR;
1723        uregex_reset(re, 0, &status);
1724
1725        result = uregex_findNext(re, &status);
1726        TEST_ASSERT(result == TRUE);
1727        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1728        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1729        TEST_ASSERT_SUCCESS(status);
1730
1731        result = uregex_findNext(re, &status);
1732        TEST_ASSERT(result == TRUE);
1733        TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1734        TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1735        TEST_ASSERT_SUCCESS(status);
1736
1737        status = U_ZERO_ERROR;
1738        uregex_reset(re, 12, &status);
1739
1740        result = uregex_findNext(re, &status);
1741        TEST_ASSERT(result == TRUE);
1742        TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1743        TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1744        TEST_ASSERT_SUCCESS(status);
1745
1746        result = uregex_findNext(re, &status);
1747        TEST_ASSERT(result == FALSE);
1748        TEST_ASSERT_SUCCESS(status);
1749
1750        uregex_close(re);
1751    }
1752
1753    /*
1754     *  group()
1755     */
1756    {
1757        UChar    text1[80];
1758        UText   *actual;
1759        UBool    result;
1760
1761        const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
1762        const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
1763
1764
1765        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
1766
1767        status = U_ZERO_ERROR;
1768        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1769        TEST_ASSERT_SUCCESS(status);
1770
1771        uregex_setText(re, text1, -1, &status);
1772        result = uregex_find(re, 0, &status);
1773        TEST_ASSERT(result==TRUE);
1774
1775        /*  Capture Group 0, the full match.  Should succeed.  */
1776        status = U_ZERO_ERROR;
1777        actual = uregex_groupUTextDeep(re, 0, NULL, &status);
1778        TEST_ASSERT_SUCCESS(status);
1779        TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
1780        utext_close(actual);
1781
1782        /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1783        status = U_ZERO_ERROR;
1784        {
1785            int64_t      group_len;
1786            int32_t      len16;
1787            UErrorCode   shallowStatus = U_ZERO_ERROR;
1788            int64_t      nativeIndex;
1789            UChar *groupChars;
1790            UText groupText = UTEXT_INITIALIZER;
1791
1792            actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
1793            TEST_ASSERT_SUCCESS(status);
1794
1795            nativeIndex = utext_getNativeIndex(actual);
1796            /*  Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp]  */
1797            /*  len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus);  */
1798            len16 = (int32_t)group_len;
1799
1800            groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
1801            utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
1802
1803            utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
1804
1805            TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
1806            utext_close(&groupText);
1807            free(groupChars);
1808        }
1809        utext_close(actual);
1810
1811        /*  Capture group #1.  Should succeed. */
1812        status = U_ZERO_ERROR;
1813        actual = uregex_groupUTextDeep(re, 1, NULL, &status);
1814        TEST_ASSERT_SUCCESS(status);
1815        TEST_ASSERT_UTEXT(str_interior, actual);
1816        utext_close(actual);
1817
1818        /*  Capture group out of range.  Error. */
1819        status = U_ZERO_ERROR;
1820        actual = uregex_groupUTextDeep(re, 2, NULL, &status);
1821        TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1822        TEST_ASSERT(utext_nativeLength(actual) == 0);
1823        utext_close(actual);
1824
1825        uregex_close(re);
1826
1827    }
1828
1829    /*
1830     *  replaceFirst()
1831     */
1832    {
1833        UChar    text1[80];
1834        UChar    text2[80];
1835        UText    replText = UTEXT_INITIALIZER;
1836        UText   *result;
1837        const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1838        const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1839        const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
1840        const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1841        const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1842        status = U_ZERO_ERROR;
1843        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1844        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1845        utext_openUTF8(&replText, str_1x, -1, &status);
1846
1847        re = uregex_openC("x(.*?)x", 0, NULL, &status);
1848        TEST_ASSERT_SUCCESS(status);
1849
1850        /*  Normal case, with match */
1851        uregex_setText(re, text1, -1, &status);
1852        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1853        TEST_ASSERT_SUCCESS(status);
1854        TEST_ASSERT_UTEXT(str_Replxxx, result);
1855        utext_close(result);
1856
1857        /* No match.  Text should copy to output with no changes.  */
1858        uregex_setText(re, text2, -1, &status);
1859        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1860        TEST_ASSERT_SUCCESS(status);
1861        TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1862        utext_close(result);
1863
1864        /* Unicode escapes */
1865        uregex_setText(re, text1, -1, &status);
1866        utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1867        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1868        TEST_ASSERT_SUCCESS(status);
1869        TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1870        utext_close(result);
1871
1872        uregex_close(re);
1873        utext_close(&replText);
1874    }
1875
1876
1877    /*
1878     *  replaceAll()
1879     */
1880    {
1881        UChar    text1[80];
1882        UChar    text2[80];
1883        UText    replText = UTEXT_INITIALIZER;
1884        UText   *result;
1885        const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1886        const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1887        const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1888        status = U_ZERO_ERROR;
1889        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1890        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1891        utext_openUTF8(&replText, str_1, -1, &status);
1892
1893        re = uregex_openC("x(.*?)x", 0, NULL, &status);
1894        TEST_ASSERT_SUCCESS(status);
1895
1896        /*  Normal case, with match */
1897        uregex_setText(re, text1, -1, &status);
1898        result = uregex_replaceAllUText(re, &replText, NULL, &status);
1899        TEST_ASSERT_SUCCESS(status);
1900        TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1901        utext_close(result);
1902
1903        /* No match.  Text should copy to output with no changes.  */
1904        uregex_setText(re, text2, -1, &status);
1905        result = uregex_replaceAllUText(re, &replText, NULL, &status);
1906        TEST_ASSERT_SUCCESS(status);
1907        TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1908        utext_close(result);
1909
1910        uregex_close(re);
1911        utext_close(&replText);
1912    }
1913
1914
1915    /*
1916     *  appendReplacement()
1917     */
1918    {
1919        UChar    text[100];
1920        UChar    repl[100];
1921        UChar    buf[100];
1922        UChar   *bufPtr;
1923        int32_t  bufCap;
1924
1925        status = U_ZERO_ERROR;
1926        re = uregex_openC(".*", 0, 0, &status);
1927        TEST_ASSERT_SUCCESS(status);
1928
1929        u_uastrncpy(text, "whatever",  sizeof(text)/2);
1930        u_uastrncpy(repl, "some other", sizeof(repl)/2);
1931        uregex_setText(re, text, -1, &status);
1932
1933        /* match covers whole target string */
1934        uregex_find(re, 0, &status);
1935        TEST_ASSERT_SUCCESS(status);
1936        bufPtr = buf;
1937        bufCap = sizeof(buf) / 2;
1938        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1939        TEST_ASSERT_SUCCESS(status);
1940        TEST_ASSERT_STRING("some other", buf, TRUE);
1941
1942        /* Match has \u \U escapes */
1943        uregex_find(re, 0, &status);
1944        TEST_ASSERT_SUCCESS(status);
1945        bufPtr = buf;
1946        bufCap = sizeof(buf) / 2;
1947        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1948        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1949        TEST_ASSERT_SUCCESS(status);
1950        TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1951
1952        uregex_close(re);
1953    }
1954
1955
1956    /*
1957     *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1958     */
1959
1960    /*
1961     *  splitUText()
1962     */
1963    {
1964        UChar    textToSplit[80];
1965        UChar    text2[80];
1966        UText    *fields[10];
1967        int32_t  numFields;
1968        int32_t i;
1969
1970        u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1971        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1972
1973        status = U_ZERO_ERROR;
1974        re = uregex_openC(":", 0, NULL, &status);
1975
1976
1977        /*  Simple split */
1978
1979        uregex_setText(re, textToSplit, -1, &status);
1980        TEST_ASSERT_SUCCESS(status);
1981
1982        /* The TEST_ASSERT_SUCCESS call above should change too... */
1983        if (U_SUCCESS(status)) {
1984            memset(fields, 0, sizeof(fields));
1985            numFields = uregex_splitUText(re, fields, 10, &status);
1986            TEST_ASSERT_SUCCESS(status);
1987
1988            /* The TEST_ASSERT_SUCCESS call above should change too... */
1989            if(U_SUCCESS(status)) {
1990              const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1991              const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1992              const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1993                TEST_ASSERT(numFields == 3);
1994                TEST_ASSERT_UTEXT(str_first,  fields[0]);
1995                TEST_ASSERT_UTEXT(str_second, fields[1]);
1996                TEST_ASSERT_UTEXT(str_third, fields[2]);
1997                TEST_ASSERT(fields[3] == NULL);
1998            }
1999            for(i = 0; i < numFields; i++) {
2000                utext_close(fields[i]);
2001            }
2002        }
2003
2004        uregex_close(re);
2005
2006
2007        /*  Split with too few output strings available */
2008        status = U_ZERO_ERROR;
2009        re = uregex_openC(":", 0, NULL, &status);
2010        uregex_setText(re, textToSplit, -1, &status);
2011        TEST_ASSERT_SUCCESS(status);
2012
2013        /* The TEST_ASSERT_SUCCESS call above should change too... */
2014        if(U_SUCCESS(status)) {
2015            fields[0] = NULL;
2016            fields[1] = NULL;
2017            fields[2] = &patternText;
2018            numFields = uregex_splitUText(re, fields, 2, &status);
2019            TEST_ASSERT_SUCCESS(status);
2020
2021            /* The TEST_ASSERT_SUCCESS call above should change too... */
2022            if(U_SUCCESS(status)) {
2023                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2024                const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2025                TEST_ASSERT(numFields == 2);
2026                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2027                TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2028                TEST_ASSERT(fields[2] == &patternText);
2029            }
2030            for(i = 0; i < numFields; i++) {
2031                utext_close(fields[i]);
2032            }
2033        }
2034
2035        uregex_close(re);
2036    }
2037
2038    /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2039     *                   comes out as additional fields.  */
2040    {
2041        UChar    textToSplit[80];
2042        UText    *fields[10];
2043        int32_t  numFields;
2044        int32_t i;
2045
2046        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
2047
2048        status = U_ZERO_ERROR;
2049        re = uregex_openC("<(.*?)>", 0, NULL, &status);
2050
2051        uregex_setText(re, textToSplit, -1, &status);
2052        TEST_ASSERT_SUCCESS(status);
2053
2054        /* The TEST_ASSERT_SUCCESS call above should change too... */
2055        if(U_SUCCESS(status)) {
2056            memset(fields, 0, sizeof(fields));
2057            numFields = uregex_splitUText(re, fields, 10, &status);
2058            TEST_ASSERT_SUCCESS(status);
2059
2060            /* The TEST_ASSERT_SUCCESS call above should change too... */
2061            if(U_SUCCESS(status)) {
2062                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2063                const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2064                const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2065                const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2066                const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2067
2068                TEST_ASSERT(numFields == 5);
2069                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2070                TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2071                TEST_ASSERT_UTEXT(str_second, fields[2]);
2072                TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2073                TEST_ASSERT_UTEXT(str_third, fields[4]);
2074                TEST_ASSERT(fields[5] == NULL);
2075            }
2076            for(i = 0; i < numFields; i++) {
2077                utext_close(fields[i]);
2078            }
2079        }
2080
2081        /*  Split with too few output strings available (2) */
2082        status = U_ZERO_ERROR;
2083        fields[0] = NULL;
2084        fields[1] = NULL;
2085        fields[2] = &patternText;
2086        numFields = uregex_splitUText(re, fields, 2, &status);
2087        TEST_ASSERT_SUCCESS(status);
2088
2089        /* The TEST_ASSERT_SUCCESS call above should change too... */
2090        if(U_SUCCESS(status)) {
2091            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2092            const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2093            TEST_ASSERT(numFields == 2);
2094            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2095            TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2096            TEST_ASSERT(fields[2] == &patternText);
2097        }
2098        for(i = 0; i < numFields; i++) {
2099            utext_close(fields[i]);
2100        }
2101
2102
2103        /*  Split with too few output strings available (3) */
2104        status = U_ZERO_ERROR;
2105        fields[0] = NULL;
2106        fields[1] = NULL;
2107        fields[2] = NULL;
2108        fields[3] = &patternText;
2109        numFields = uregex_splitUText(re, fields, 3, &status);
2110        TEST_ASSERT_SUCCESS(status);
2111
2112        /* The TEST_ASSERT_SUCCESS call above should change too... */
2113        if(U_SUCCESS(status)) {
2114            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2115            const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2116            const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2117            TEST_ASSERT(numFields == 3);
2118            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2119            TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2120            TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2121            TEST_ASSERT(fields[3] == &patternText);
2122        }
2123        for(i = 0; i < numFields; i++) {
2124            utext_close(fields[i]);
2125        }
2126
2127        /*  Split with just enough output strings available (5) */
2128        status = U_ZERO_ERROR;
2129        fields[0] = NULL;
2130        fields[1] = NULL;
2131        fields[2] = NULL;
2132        fields[3] = NULL;
2133        fields[4] = NULL;
2134        fields[5] = &patternText;
2135        numFields = uregex_splitUText(re, fields, 5, &status);
2136        TEST_ASSERT_SUCCESS(status);
2137
2138        /* The TEST_ASSERT_SUCCESS call above should change too... */
2139        if(U_SUCCESS(status)) {
2140            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2141            const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2142            const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2143            const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2144            const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2145
2146            TEST_ASSERT(numFields == 5);
2147            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2148            TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2149            TEST_ASSERT_UTEXT(str_second, fields[2]);
2150            TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2151            TEST_ASSERT_UTEXT(str_third, fields[4]);
2152            TEST_ASSERT(fields[5] == &patternText);
2153        }
2154        for(i = 0; i < numFields; i++) {
2155            utext_close(fields[i]);
2156        }
2157
2158        /* Split, end of text is a field delimiter.   */
2159        status = U_ZERO_ERROR;
2160        uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2161        TEST_ASSERT_SUCCESS(status);
2162
2163        /* The TEST_ASSERT_SUCCESS call above should change too... */
2164        if(U_SUCCESS(status)) {
2165            memset(fields, 0, sizeof(fields));
2166            fields[9] = &patternText;
2167            numFields = uregex_splitUText(re, fields, 9, &status);
2168            TEST_ASSERT_SUCCESS(status);
2169
2170            /* The TEST_ASSERT_SUCCESS call above should change too... */
2171            if(U_SUCCESS(status)) {
2172                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2173                const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2174                const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2175                const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2176                const char str_empty[] = { 0x00 };
2177
2178                TEST_ASSERT(numFields == 5);
2179                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2180                TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2181                TEST_ASSERT_UTEXT(str_second, fields[2]);
2182                TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2183                TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2184                TEST_ASSERT(fields[5] == NULL);
2185                TEST_ASSERT(fields[8] == NULL);
2186                TEST_ASSERT(fields[9] == &patternText);
2187            }
2188            for(i = 0; i < numFields; i++) {
2189                utext_close(fields[i]);
2190            }
2191        }
2192
2193        uregex_close(re);
2194    }
2195    utext_close(&patternText);
2196}
2197
2198
2199static void TestRefreshInput(void) {
2200    /*
2201     *  RefreshInput changes out the input of a URegularExpression without
2202     *    changing anything else in the match state.  Used with Java JNI,
2203     *    when Java moves the underlying string storage.   This test
2204     *    runs a find() loop, moving the text after the first match.
2205     *    The right number of matches should still be found.
2206     */
2207    UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2208    UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2209    UErrorCode status = U_ZERO_ERROR;
2210    URegularExpression *re;
2211    UText ut1 = UTEXT_INITIALIZER;
2212    UText ut2 = UTEXT_INITIALIZER;
2213
2214    re = uregex_openC("[ABC]", 0, 0, &status);
2215    TEST_ASSERT_SUCCESS(status);
2216
2217    utext_openUChars(&ut1, testStr, -1, &status);
2218    TEST_ASSERT_SUCCESS(status);
2219    uregex_setUText(re, &ut1, &status);
2220    TEST_ASSERT_SUCCESS(status);
2221
2222    /* Find the first match "A" in the original string */
2223    TEST_ASSERT(uregex_findNext(re, &status));
2224    TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2225
2226    /* Move the string, kill the original string.  */
2227    u_strcpy(movedStr, testStr);
2228    u_memset(testStr, 0, u_strlen(testStr));
2229    utext_openUChars(&ut2, movedStr, -1, &status);
2230    TEST_ASSERT_SUCCESS(status);
2231    uregex_refreshUText(re, &ut2, &status);
2232    TEST_ASSERT_SUCCESS(status);
2233
2234    /* Find the following two matches, now working in the moved string. */
2235    TEST_ASSERT(uregex_findNext(re, &status));
2236    TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2237    TEST_ASSERT(uregex_findNext(re, &status));
2238    TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2239    TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2240
2241    uregex_close(re);
2242}
2243
2244
2245static void TestBug8421(void) {
2246    /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2247     *             was failing.
2248     */
2249    URegularExpression *re;
2250    UErrorCode status = U_ZERO_ERROR;
2251    int32_t  limit = -1;
2252
2253    re = uregex_openC("abc", 0, 0, &status);
2254    TEST_ASSERT_SUCCESS(status);
2255
2256    limit = uregex_getTimeLimit(re, &status);
2257    TEST_ASSERT_SUCCESS(status);
2258    TEST_ASSERT(limit == 0);
2259
2260    uregex_setTimeLimit(re, 100, &status);
2261    TEST_ASSERT_SUCCESS(status);
2262    limit = uregex_getTimeLimit(re, &status);
2263    TEST_ASSERT_SUCCESS(status);
2264    TEST_ASSERT(limit == 100);
2265
2266    uregex_close(re);
2267}
2268
2269
2270#endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
2271