1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2004-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File reapits.c
9*
10*********************************************************************************/
11/*C API TEST FOR Regular Expressions */
12/**
13*   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
14*   try to test the full functionality.  It just calls each function and verifies that it
15*   works on a basic level.
16*
17*   More complete testing of regular expression functionality is done with the C++ tests.
18**/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_REGULAR_EXPRESSIONS
23
24#include <stdlib.h>
25#include <string.h>
26#include "unicode/uloc.h"
27#include "unicode/uregex.h"
28#include "unicode/ustring.h"
29#include "unicode/utext.h"
30#include "cintltst.h"
31
32#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
33log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
34
35#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
36log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
37
38/*
39 *   TEST_SETUP and TEST_TEARDOWN
40 *         macros to handle the boilerplate around setting up regex test cases.
41 *         parameteres to setup:
42 *              pattern:     The regex pattern, a (char *) null terminated C string.
43 *              testString:  The string data, also a (char *) C string.
44 *              flags:       Regex flags to set when compiling the pattern
45 *
46 *         Put arbitrary test code between SETUP and TEARDOWN.
47 *         're" is the compiled, ready-to-go  regular expression.
48 */
49#define TEST_SETUP(pattern, testString, flags) {  \
50    UChar   *srcString = NULL;  \
51    status = U_ZERO_ERROR; \
52    re = uregex_openC(pattern, flags, NULL, &status);  \
53    TEST_ASSERT_SUCCESS(status);   \
54    srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
55    u_uastrncpy(srcString, testString,  strlen(testString)+1); \
56    uregex_setText(re, srcString, -1, &status); \
57    TEST_ASSERT_SUCCESS(status);  \
58    if (U_SUCCESS(status)) {
59
60#define TEST_TEARDOWN  \
61    }  \
62    TEST_ASSERT_SUCCESS(status);  \
63    uregex_close(re);  \
64    free(srcString);   \
65    }
66
67
68/**
69 * @param expected utf-8 array of bytes to be expected
70 */
71static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
72     char     buf_inside_macro[120];
73     int32_t  len = (int32_t)strlen(expected);
74     UBool    success;
75     if (nulTerm) {
76         u_austrncpy(buf_inside_macro, (actual), len+1);
77         buf_inside_macro[len+2] = 0;
78         success = (strcmp((expected), buf_inside_macro) == 0);
79     } else {
80         u_austrncpy(buf_inside_macro, (actual), len);
81         buf_inside_macro[len+1] = 0;
82         success = (strncmp((expected), buf_inside_macro, len) == 0);
83     }
84     if (success == FALSE) {
85         log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
86             file, line, (expected), buf_inside_macro);
87     }
88}
89
90#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
91
92
93static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
94    UErrorCode status = U_ZERO_ERROR;
95    UText expectedText = UTEXT_INITIALIZER;
96    utext_openUTF8(&expectedText, expected, -1, &status);
97    utext_setNativeIndex(actual, 0);
98    if (utext_compare(&expectedText, -1, actual, -1) != 0) {
99        UChar32 c;
100        log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
101        c = utext_next32From(actual, 0);
102        while (c != U_SENTINEL) {
103            if (0x20<c && c <0x7e) {
104                log_err("%c", c);
105            } else {
106                log_err("%#x", c);
107            }
108            c = UTEXT_NEXT32(actual);
109        }
110        log_err("\"\n");
111    }
112    utext_close(&expectedText);
113}
114
115#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
116
117
118
119static void TestRegexCAPI(void);
120static void TestBug4315(void);
121static void TestUTextAPI(void);
122static void TestRefreshInput(void);
123static void TestBug8421(void);
124
125void addURegexTest(TestNode** root);
126
127void addURegexTest(TestNode** root)
128{
129    addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
130    addTest(root, &TestBug4315,   "regex/TestBug4315");
131    addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
132    addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
133    addTest(root, &TestBug8421,   "regex/TestBug8421");
134}
135
136/*
137 * Call back function and context struct used for testing
138 *    regular expression user callbacks.  This test is mostly the same as
139 *   the corresponding C++ test in intltest.
140 */
141typedef struct callBackContext {
142    int32_t          maxCalls;
143    int32_t          numCalls;
144    int32_t          lastSteps;
145} callBackContext;
146
147static UBool U_EXPORT2 U_CALLCONV
148TestCallbackFn(const void *context, int32_t steps) {
149  callBackContext  *info = (callBackContext *)context;
150  if (info->lastSteps+1 != steps) {
151      log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
152  }
153  info->lastSteps = steps;
154  info->numCalls++;
155  return (info->numCalls < info->maxCalls);
156}
157
158/*
159 *   Regular Expression C API Tests
160 */
161static void TestRegexCAPI(void) {
162    UErrorCode           status = U_ZERO_ERROR;
163    URegularExpression  *re;
164    UChar                pat[200];
165    UChar               *minus1;
166
167    memset(&minus1, -1, sizeof(minus1));
168
169    /* Mimimalist open/close */
170    u_uastrncpy(pat, "abc*", sizeof(pat)/2);
171    re = uregex_open(pat, -1, 0, 0, &status);
172    if (U_FAILURE(status)) {
173         log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
174         return;
175    }
176    uregex_close(re);
177
178    /* Open with all flag values set */
179    status = U_ZERO_ERROR;
180    re = uregex_open(pat, -1,
181        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
182        0, &status);
183    TEST_ASSERT_SUCCESS(status);
184    uregex_close(re);
185
186    /* Open with an invalid flag */
187    status = U_ZERO_ERROR;
188    re = uregex_open(pat, -1, 0x40000000, 0, &status);
189    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
190    uregex_close(re);
191
192    /* Open with an unimplemented flag */
193    status = U_ZERO_ERROR;
194    re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status);
195    TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
196    uregex_close(re);
197
198    /* openC with an invalid parameter */
199    status = U_ZERO_ERROR;
200    re = uregex_openC(NULL,
201        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
202    TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
203
204    /* openC with an invalid parameter */
205    status = U_USELESS_COLLATOR_ERROR;
206    re = uregex_openC(NULL,
207        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
208    TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
209
210    /* openC   open from a C string */
211    {
212        const UChar   *p;
213        int32_t  len;
214        status = U_ZERO_ERROR;
215        re = uregex_openC("abc*", 0, 0, &status);
216        TEST_ASSERT_SUCCESS(status);
217        p = uregex_pattern(re, &len, &status);
218        TEST_ASSERT_SUCCESS(status);
219
220        /* The TEST_ASSERT_SUCCESS above should change too... */
221        if(U_SUCCESS(status)) {
222            u_uastrncpy(pat, "abc*", sizeof(pat)/2);
223            TEST_ASSERT(u_strcmp(pat, p) == 0);
224            TEST_ASSERT(len==(int32_t)strlen("abc*"));
225        }
226
227        uregex_close(re);
228
229        /*  TODO:  Open with ParseError parameter */
230    }
231
232    /*
233     *  clone
234     */
235    {
236        URegularExpression *clone1;
237        URegularExpression *clone2;
238        URegularExpression *clone3;
239        UChar  testString1[30];
240        UChar  testString2[30];
241        UBool  result;
242
243
244        status = U_ZERO_ERROR;
245        re = uregex_openC("abc*", 0, 0, &status);
246        TEST_ASSERT_SUCCESS(status);
247        clone1 = uregex_clone(re, &status);
248        TEST_ASSERT_SUCCESS(status);
249        TEST_ASSERT(clone1 != NULL);
250
251        status = U_ZERO_ERROR;
252        clone2 = uregex_clone(re, &status);
253        TEST_ASSERT_SUCCESS(status);
254        TEST_ASSERT(clone2 != NULL);
255        uregex_close(re);
256
257        status = U_ZERO_ERROR;
258        clone3 = uregex_clone(clone2, &status);
259        TEST_ASSERT_SUCCESS(status);
260        TEST_ASSERT(clone3 != NULL);
261
262        u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
263        u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
264
265        status = U_ZERO_ERROR;
266        uregex_setText(clone1, testString1, -1, &status);
267        TEST_ASSERT_SUCCESS(status);
268        result = uregex_lookingAt(clone1, 0, &status);
269        TEST_ASSERT_SUCCESS(status);
270        TEST_ASSERT(result==TRUE);
271
272        status = U_ZERO_ERROR;
273        uregex_setText(clone2, testString2, -1, &status);
274        TEST_ASSERT_SUCCESS(status);
275        result = uregex_lookingAt(clone2, 0, &status);
276        TEST_ASSERT_SUCCESS(status);
277        TEST_ASSERT(result==FALSE);
278        result = uregex_find(clone2, 0, &status);
279        TEST_ASSERT_SUCCESS(status);
280        TEST_ASSERT(result==TRUE);
281
282        uregex_close(clone1);
283        uregex_close(clone2);
284        uregex_close(clone3);
285
286    }
287
288    /*
289     *  pattern()
290    */
291    {
292        const UChar  *resultPat;
293        int32_t       resultLen;
294        u_uastrncpy(pat, "hello", sizeof(pat)/2);
295        status = U_ZERO_ERROR;
296        re = uregex_open(pat, -1, 0, NULL, &status);
297        resultPat = uregex_pattern(re, &resultLen, &status);
298        TEST_ASSERT_SUCCESS(status);
299
300        /* The TEST_ASSERT_SUCCESS above should change too... */
301        if (U_SUCCESS(status)) {
302            TEST_ASSERT(resultLen == -1);
303            TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
304        }
305
306        uregex_close(re);
307
308        status = U_ZERO_ERROR;
309        re = uregex_open(pat, 3, 0, NULL, &status);
310        resultPat = uregex_pattern(re, &resultLen, &status);
311        TEST_ASSERT_SUCCESS(status);
312        TEST_ASSERT_SUCCESS(status);
313
314        /* The TEST_ASSERT_SUCCESS above should change too... */
315        if (U_SUCCESS(status)) {
316            TEST_ASSERT(resultLen == 3);
317            TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
318            TEST_ASSERT(u_strlen(resultPat) == 3);
319        }
320
321        uregex_close(re);
322    }
323
324    /*
325     *  flags()
326     */
327    {
328        int32_t  t;
329
330        status = U_ZERO_ERROR;
331        re = uregex_open(pat, -1, 0, NULL, &status);
332        t  = uregex_flags(re, &status);
333        TEST_ASSERT_SUCCESS(status);
334        TEST_ASSERT(t == 0);
335        uregex_close(re);
336
337        status = U_ZERO_ERROR;
338        re = uregex_open(pat, -1, 0, NULL, &status);
339        t  = uregex_flags(re, &status);
340        TEST_ASSERT_SUCCESS(status);
341        TEST_ASSERT(t == 0);
342        uregex_close(re);
343
344        status = U_ZERO_ERROR;
345        re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
346        t  = uregex_flags(re, &status);
347        TEST_ASSERT_SUCCESS(status);
348        TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
349        uregex_close(re);
350    }
351
352    /*
353     *  setText() and lookingAt()
354     */
355    {
356        UChar  text1[50];
357        UChar  text2[50];
358        UBool  result;
359
360        u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
361        u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
362        status = U_ZERO_ERROR;
363        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
364        re = uregex_open(pat, -1, 0, NULL, &status);
365        TEST_ASSERT_SUCCESS(status);
366
367        /* Operation before doing a setText should fail... */
368        status = U_ZERO_ERROR;
369        uregex_lookingAt(re, 0, &status);
370        TEST_ASSERT( status== U_REGEX_INVALID_STATE);
371
372        status = U_ZERO_ERROR;
373        uregex_setText(re, text1, -1, &status);
374        result = uregex_lookingAt(re, 0, &status);
375        TEST_ASSERT(result == TRUE);
376        TEST_ASSERT_SUCCESS(status);
377
378        status = U_ZERO_ERROR;
379        uregex_setText(re, text2, -1, &status);
380        result = uregex_lookingAt(re, 0, &status);
381        TEST_ASSERT(result == FALSE);
382        TEST_ASSERT_SUCCESS(status);
383
384        status = U_ZERO_ERROR;
385        uregex_setText(re, text1, -1, &status);
386        result = uregex_lookingAt(re, 0, &status);
387        TEST_ASSERT(result == TRUE);
388        TEST_ASSERT_SUCCESS(status);
389
390        status = U_ZERO_ERROR;
391        uregex_setText(re, text1, 5, &status);
392        result = uregex_lookingAt(re, 0, &status);
393        TEST_ASSERT(result == FALSE);
394        TEST_ASSERT_SUCCESS(status);
395
396        status = U_ZERO_ERROR;
397        uregex_setText(re, text1, 6, &status);
398        result = uregex_lookingAt(re, 0, &status);
399        TEST_ASSERT(result == TRUE);
400        TEST_ASSERT_SUCCESS(status);
401
402        uregex_close(re);
403    }
404
405
406    /*
407     *  getText()
408     */
409    {
410        UChar    text1[50];
411        UChar    text2[50];
412        const UChar   *result;
413        int32_t  textLength;
414
415        u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
416        u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
417        status = U_ZERO_ERROR;
418        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
419        re = uregex_open(pat, -1, 0, NULL, &status);
420
421        uregex_setText(re, text1, -1, &status);
422        result = uregex_getText(re, &textLength, &status);
423        TEST_ASSERT(result == text1);
424        TEST_ASSERT(textLength == -1);
425        TEST_ASSERT_SUCCESS(status);
426
427        status = U_ZERO_ERROR;
428        uregex_setText(re, text2, 7, &status);
429        result = uregex_getText(re, &textLength, &status);
430        TEST_ASSERT(result == text2);
431        TEST_ASSERT(textLength == 7);
432        TEST_ASSERT_SUCCESS(status);
433
434        status = U_ZERO_ERROR;
435        uregex_setText(re, text2, 4, &status);
436        result = uregex_getText(re, &textLength, &status);
437        TEST_ASSERT(result == text2);
438        TEST_ASSERT(textLength == 4);
439        TEST_ASSERT_SUCCESS(status);
440        uregex_close(re);
441    }
442
443    /*
444     *  matches()
445     */
446    {
447        UChar   text1[50];
448        UBool   result;
449        int     len;
450        UChar   nullString[] = {0,0,0};
451
452        u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
453        status = U_ZERO_ERROR;
454        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
455        re = uregex_open(pat, -1, 0, NULL, &status);
456
457        uregex_setText(re, text1, -1, &status);
458        result = uregex_matches(re, 0, &status);
459        TEST_ASSERT(result == FALSE);
460        TEST_ASSERT_SUCCESS(status);
461
462        status = U_ZERO_ERROR;
463        uregex_setText(re, text1, 6, &status);
464        result = uregex_matches(re, 0, &status);
465        TEST_ASSERT(result == TRUE);
466        TEST_ASSERT_SUCCESS(status);
467
468        status = U_ZERO_ERROR;
469        uregex_setText(re, text1, 6, &status);
470        result = uregex_matches(re, 1, &status);
471        TEST_ASSERT(result == FALSE);
472        TEST_ASSERT_SUCCESS(status);
473        uregex_close(re);
474
475        status = U_ZERO_ERROR;
476        re = uregex_openC(".?", 0, NULL, &status);
477        uregex_setText(re, text1, -1, &status);
478        len = u_strlen(text1);
479        result = uregex_matches(re, len, &status);
480        TEST_ASSERT(result == TRUE);
481        TEST_ASSERT_SUCCESS(status);
482
483        status = U_ZERO_ERROR;
484        uregex_setText(re, nullString, -1, &status);
485        TEST_ASSERT_SUCCESS(status);
486        result = uregex_matches(re, 0, &status);
487        TEST_ASSERT(result == TRUE);
488        TEST_ASSERT_SUCCESS(status);
489        uregex_close(re);
490    }
491
492
493    /*
494     *  lookingAt()    Used in setText test.
495     */
496
497
498    /*
499     *  find(), findNext, start, end, reset
500     */
501    {
502        UChar    text1[50];
503        UBool    result;
504        u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
505        status = U_ZERO_ERROR;
506        re = uregex_openC("rx", 0, NULL, &status);
507
508        uregex_setText(re, text1, -1, &status);
509        result = uregex_find(re, 0, &status);
510        TEST_ASSERT(result == TRUE);
511        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
512        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
513        TEST_ASSERT_SUCCESS(status);
514
515        result = uregex_find(re, 9, &status);
516        TEST_ASSERT(result == TRUE);
517        TEST_ASSERT(uregex_start(re, 0, &status) == 11);
518        TEST_ASSERT(uregex_end(re, 0, &status) == 13);
519        TEST_ASSERT_SUCCESS(status);
520
521        result = uregex_find(re, 14, &status);
522        TEST_ASSERT(result == FALSE);
523        TEST_ASSERT_SUCCESS(status);
524
525        status = U_ZERO_ERROR;
526        uregex_reset(re, 0, &status);
527
528        result = uregex_findNext(re, &status);
529        TEST_ASSERT(result == TRUE);
530        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
531        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
532        TEST_ASSERT_SUCCESS(status);
533
534        result = uregex_findNext(re, &status);
535        TEST_ASSERT(result == TRUE);
536        TEST_ASSERT(uregex_start(re, 0, &status) == 6);
537        TEST_ASSERT(uregex_end(re, 0, &status) == 8);
538        TEST_ASSERT_SUCCESS(status);
539
540        status = U_ZERO_ERROR;
541        uregex_reset(re, 12, &status);
542
543        result = uregex_findNext(re, &status);
544        TEST_ASSERT(result == TRUE);
545        TEST_ASSERT(uregex_start(re, 0, &status) == 13);
546        TEST_ASSERT(uregex_end(re, 0, &status) == 15);
547        TEST_ASSERT_SUCCESS(status);
548
549        result = uregex_findNext(re, &status);
550        TEST_ASSERT(result == FALSE);
551        TEST_ASSERT_SUCCESS(status);
552
553        uregex_close(re);
554    }
555
556    /*
557     *  groupCount
558     */
559    {
560        int32_t result;
561
562        status = U_ZERO_ERROR;
563        re = uregex_openC("abc", 0, NULL, &status);
564        result = uregex_groupCount(re, &status);
565        TEST_ASSERT_SUCCESS(status);
566        TEST_ASSERT(result == 0);
567        uregex_close(re);
568
569        status = U_ZERO_ERROR;
570        re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
571        result = uregex_groupCount(re, &status);
572        TEST_ASSERT_SUCCESS(status);
573        TEST_ASSERT(result == 3);
574        uregex_close(re);
575
576    }
577
578
579    /*
580     *  group()
581     */
582    {
583        UChar    text1[80];
584        UChar    buf[80];
585        UBool    result;
586        int32_t  resultSz;
587        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
588
589        status = U_ZERO_ERROR;
590        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
591        TEST_ASSERT_SUCCESS(status);
592
593
594        uregex_setText(re, text1, -1, &status);
595        result = uregex_find(re, 0, &status);
596        TEST_ASSERT(result==TRUE);
597
598        /*  Capture Group 0, the full match.  Should succeed.  */
599        status = U_ZERO_ERROR;
600        resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
601        TEST_ASSERT_SUCCESS(status);
602        TEST_ASSERT_STRING("abc interior def", buf, TRUE);
603        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
604
605        /*  Capture group #1.  Should succeed. */
606        status = U_ZERO_ERROR;
607        resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
608        TEST_ASSERT_SUCCESS(status);
609        TEST_ASSERT_STRING(" interior ", buf, TRUE);
610        TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
611
612        /*  Capture group out of range.  Error. */
613        status = U_ZERO_ERROR;
614        uregex_group(re, 2, buf, sizeof(buf)/2, &status);
615        TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
616
617        /* NULL buffer, pure pre-flight */
618        status = U_ZERO_ERROR;
619        resultSz = uregex_group(re, 0, NULL, 0, &status);
620        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
621        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
622
623        /* Too small buffer, truncated string */
624        status = U_ZERO_ERROR;
625        memset(buf, -1, sizeof(buf));
626        resultSz = uregex_group(re, 0, buf, 5, &status);
627        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
628        TEST_ASSERT_STRING("abc i", buf, FALSE);
629        TEST_ASSERT(buf[5] == (UChar)0xffff);
630        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
631
632        /* Output string just fits buffer, no NUL term. */
633        status = U_ZERO_ERROR;
634        resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
635        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
636        TEST_ASSERT_STRING("abc interior def", buf, FALSE);
637        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
638        TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
639
640        uregex_close(re);
641
642    }
643
644    /*
645     *  Regions
646     */
647
648
649        /* SetRegion(), getRegion() do something  */
650        TEST_SETUP(".*", "0123456789ABCDEF", 0)
651        UChar resultString[40];
652        TEST_ASSERT(uregex_regionStart(re, &status) == 0);
653        TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
654        uregex_setRegion(re, 3, 6, &status);
655        TEST_ASSERT(uregex_regionStart(re, &status) == 3);
656        TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
657        TEST_ASSERT(uregex_findNext(re, &status));
658        TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
659        TEST_ASSERT_STRING("345", resultString, TRUE);
660        TEST_TEARDOWN;
661
662        /* find(start=-1) uses regions   */
663        TEST_SETUP(".*", "0123456789ABCDEF", 0);
664        uregex_setRegion(re, 4, 6, &status);
665        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
666        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
667        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
668        TEST_TEARDOWN;
669
670        /* find (start >=0) does not use regions   */
671        TEST_SETUP(".*", "0123456789ABCDEF", 0);
672        uregex_setRegion(re, 4, 6, &status);
673        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
674        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
675        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
676        TEST_TEARDOWN;
677
678        /* findNext() obeys regions    */
679        TEST_SETUP(".", "0123456789ABCDEF", 0);
680        uregex_setRegion(re, 4, 6, &status);
681        TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
682        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
683        TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
684        TEST_ASSERT(uregex_start(re, 0, &status) == 5);
685        TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
686        TEST_TEARDOWN;
687
688        /* matches(start=-1) uses regions                                           */
689        /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
690        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
691        uregex_setRegion(re, 4, 6, &status);
692        TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
693        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
694        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
695        TEST_TEARDOWN;
696
697        /* matches (start >=0) does not use regions       */
698        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
699        uregex_setRegion(re, 4, 6, &status);
700        TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
701        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
702        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
703        TEST_TEARDOWN;
704
705        /* lookingAt(start=-1) uses regions                                         */
706        /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
707        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
708        uregex_setRegion(re, 4, 6, &status);
709        TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
710        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
711        TEST_ASSERT(uregex_end(re, 0, &status) == 4);
712        TEST_TEARDOWN;
713
714        /* lookingAt (start >=0) does not use regions  */
715        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
716        uregex_setRegion(re, 4, 6, &status);
717        TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
718        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
719        TEST_ASSERT(uregex_end(re, 0, &status) == 0);
720        TEST_TEARDOWN;
721
722        /* hitEnd()       */
723        TEST_SETUP("[a-f]*", "abcdefghij", 0);
724        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
725        TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
726        TEST_TEARDOWN;
727
728        TEST_SETUP("[a-f]*", "abcdef", 0);
729        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
730        TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
731        TEST_TEARDOWN;
732
733        /* requireEnd   */
734        TEST_SETUP("abcd", "abcd", 0);
735        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
736        TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
737        TEST_TEARDOWN;
738
739        TEST_SETUP("abcd$", "abcd", 0);
740        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
741        TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
742        TEST_TEARDOWN;
743
744        /* anchoringBounds        */
745        TEST_SETUP("abc$", "abcdef", 0);
746        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
747        uregex_useAnchoringBounds(re, FALSE, &status);
748        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
749
750        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
751        uregex_useAnchoringBounds(re, TRUE, &status);
752        uregex_setRegion(re, 0, 3, &status);
753        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
754        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
755        TEST_TEARDOWN;
756
757        /* Transparent Bounds      */
758        TEST_SETUP("abc(?=def)", "abcdef", 0);
759        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
760        uregex_useTransparentBounds(re, TRUE, &status);
761        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
762
763        uregex_useTransparentBounds(re, FALSE, &status);
764        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
765        uregex_setRegion(re, 0, 3, &status);
766        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
767        uregex_useTransparentBounds(re, TRUE, &status);
768        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
769        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
770        TEST_TEARDOWN;
771
772
773    /*
774     *  replaceFirst()
775     */
776    {
777        UChar    text1[80];
778        UChar    text2[80];
779        UChar    replText[80];
780        UChar    buf[80];
781        int32_t  resultSz;
782        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
783        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
784        u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
785
786        status = U_ZERO_ERROR;
787        re = uregex_openC("x(.*?)x", 0, NULL, &status);
788        TEST_ASSERT_SUCCESS(status);
789
790        /*  Normal case, with match */
791        uregex_setText(re, text1, -1, &status);
792        resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
793        TEST_ASSERT_SUCCESS(status);
794        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
795        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
796
797        /* No match.  Text should copy to output with no changes.  */
798        status = U_ZERO_ERROR;
799        uregex_setText(re, text2, -1, &status);
800        resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
801        TEST_ASSERT_SUCCESS(status);
802        TEST_ASSERT_STRING("No match here.", buf, TRUE);
803        TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
804
805        /*  Match, output just fills buffer, no termination warning. */
806        status = U_ZERO_ERROR;
807        uregex_setText(re, text1, -1, &status);
808        memset(buf, -1, sizeof(buf));
809        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
810        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
811        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
812        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
813        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
814
815        /* Do the replaceFirst again, without first resetting anything.
816         *  Should give the same results.
817         */
818        status = U_ZERO_ERROR;
819        memset(buf, -1, sizeof(buf));
820        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
821        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
822        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
823        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
824        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
825
826        /* NULL buffer, zero buffer length */
827        status = U_ZERO_ERROR;
828        resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
829        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
830        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
831
832        /* Buffer too small by one */
833        status = U_ZERO_ERROR;
834        memset(buf, -1, sizeof(buf));
835        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
836        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
837        TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
838        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
839        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
840
841        uregex_close(re);
842    }
843
844
845    /*
846     *  replaceAll()
847     */
848    {
849        UChar    text1[80];          /*  "Replace xaax x1x x...x." */
850        UChar    text2[80];          /*  "No match Here"           */
851        UChar    replText[80];       /*  "<$1>"                    */
852        UChar    replText2[80];      /*  "<<$1>>"                  */
853        const char * pattern = "x(.*?)x";
854        const char * expectedResult = "Replace <aa> <1> <...>.";
855        const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
856        UChar    buf[80];
857        int32_t  resultSize;
858        int32_t  expectedResultSize;
859        int32_t  expectedResultSize2;
860        int32_t  i;
861
862        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
863        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
864        u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
865        u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
866        expectedResultSize = strlen(expectedResult);
867        expectedResultSize2 = strlen(expectedResult2);
868
869        status = U_ZERO_ERROR;
870        re = uregex_openC(pattern, 0, NULL, &status);
871        TEST_ASSERT_SUCCESS(status);
872
873        /*  Normal case, with match */
874        uregex_setText(re, text1, -1, &status);
875        resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
876        TEST_ASSERT_SUCCESS(status);
877        TEST_ASSERT_STRING(expectedResult, buf, TRUE);
878        TEST_ASSERT(resultSize == expectedResultSize);
879
880        /* No match.  Text should copy to output with no changes.  */
881        status = U_ZERO_ERROR;
882        uregex_setText(re, text2, -1, &status);
883        resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
884        TEST_ASSERT_SUCCESS(status);
885        TEST_ASSERT_STRING("No match here.", buf, TRUE);
886        TEST_ASSERT(resultSize == u_strlen(text2));
887
888        /*  Match, output just fills buffer, no termination warning. */
889        status = U_ZERO_ERROR;
890        uregex_setText(re, text1, -1, &status);
891        memset(buf, -1, sizeof(buf));
892        resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
893        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
894        TEST_ASSERT_STRING(expectedResult, buf, FALSE);
895        TEST_ASSERT(resultSize == expectedResultSize);
896        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
897
898        /* Do the replaceFirst again, without first resetting anything.
899         *  Should give the same results.
900         */
901        status = U_ZERO_ERROR;
902        memset(buf, -1, sizeof(buf));
903        resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
904        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
905        TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
906        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
907        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
908
909        /* NULL buffer, zero buffer length */
910        status = U_ZERO_ERROR;
911        resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
912        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
913        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
914
915        /* Buffer too small.  Try every size, which will tickle edge cases
916         * in uregex_appendReplacement (used by replaceAll)   */
917        for (i=0; i<expectedResultSize; i++) {
918            char  expected[80];
919            status = U_ZERO_ERROR;
920            memset(buf, -1, sizeof(buf));
921            resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
922            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
923            strcpy(expected, expectedResult);
924            expected[i] = 0;
925            TEST_ASSERT_STRING(expected, buf, FALSE);
926            TEST_ASSERT(resultSize == expectedResultSize);
927            TEST_ASSERT(buf[i] == (UChar)0xffff);
928        }
929
930        /* Buffer too small.  Same as previous test, except this time the replacement
931         * text is longer than the match capture group, making the length of the complete
932         * replacement longer than the original string.
933         */
934        for (i=0; i<expectedResultSize2; i++) {
935            char  expected[80];
936            status = U_ZERO_ERROR;
937            memset(buf, -1, sizeof(buf));
938            resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
939            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
940            strcpy(expected, expectedResult2);
941            expected[i] = 0;
942            TEST_ASSERT_STRING(expected, buf, FALSE);
943            TEST_ASSERT(resultSize == expectedResultSize2);
944            TEST_ASSERT(buf[i] == (UChar)0xffff);
945        }
946
947
948        uregex_close(re);
949    }
950
951
952    /*
953     *  appendReplacement()
954     */
955    {
956        UChar    text[100];
957        UChar    repl[100];
958        UChar    buf[100];
959        UChar   *bufPtr;
960        int32_t  bufCap;
961
962
963        status = U_ZERO_ERROR;
964        re = uregex_openC(".*", 0, 0, &status);
965        TEST_ASSERT_SUCCESS(status);
966
967        u_uastrncpy(text, "whatever",  sizeof(text)/2);
968        u_uastrncpy(repl, "some other", sizeof(repl)/2);
969        uregex_setText(re, text, -1, &status);
970
971        /* match covers whole target string */
972        uregex_find(re, 0, &status);
973        TEST_ASSERT_SUCCESS(status);
974        bufPtr = buf;
975        bufCap = sizeof(buf) / 2;
976        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
977        TEST_ASSERT_SUCCESS(status);
978        TEST_ASSERT_STRING("some other", buf, TRUE);
979
980        /* Match has \u \U escapes */
981        uregex_find(re, 0, &status);
982        TEST_ASSERT_SUCCESS(status);
983        bufPtr = buf;
984        bufCap = sizeof(buf) / 2;
985        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
986        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
987        TEST_ASSERT_SUCCESS(status);
988        TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
989
990        /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
991        status = U_ZERO_ERROR;
992        uregex_find(re, 0, &status);
993        TEST_ASSERT_SUCCESS(status);
994        bufPtr = buf;
995        status = U_BUFFER_OVERFLOW_ERROR;
996        uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
997        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
998
999        uregex_close(re);
1000    }
1001
1002
1003    /*
1004     *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1005     */
1006
1007    /*
1008     *  split()
1009     */
1010    {
1011        UChar    textToSplit[80];
1012        UChar    text2[80];
1013        UChar    buf[200];
1014        UChar    *fields[10];
1015        int32_t  numFields;
1016        int32_t  requiredCapacity;
1017        int32_t  spaceNeeded;
1018        int32_t  sz;
1019
1020        u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1021        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1022
1023        status = U_ZERO_ERROR;
1024        re = uregex_openC(":", 0, NULL, &status);
1025
1026
1027        /*  Simple split */
1028
1029        uregex_setText(re, textToSplit, -1, &status);
1030        TEST_ASSERT_SUCCESS(status);
1031
1032        /* The TEST_ASSERT_SUCCESS call above should change too... */
1033        if (U_SUCCESS(status)) {
1034            memset(fields, -1, sizeof(fields));
1035            numFields =
1036                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1037            TEST_ASSERT_SUCCESS(status);
1038
1039            /* The TEST_ASSERT_SUCCESS call above should change too... */
1040            if(U_SUCCESS(status)) {
1041                TEST_ASSERT(numFields == 3);
1042                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1043                TEST_ASSERT_STRING(" second", fields[1], TRUE);
1044                TEST_ASSERT_STRING("  third", fields[2], TRUE);
1045                TEST_ASSERT(fields[3] == NULL);
1046
1047                spaceNeeded = u_strlen(textToSplit) -
1048                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1049                            numFields;          /* Each field gets a NUL terminator */
1050
1051                TEST_ASSERT(spaceNeeded == requiredCapacity);
1052            }
1053        }
1054
1055        uregex_close(re);
1056
1057
1058        /*  Split with too few output strings available */
1059        status = U_ZERO_ERROR;
1060        re = uregex_openC(":", 0, NULL, &status);
1061        uregex_setText(re, textToSplit, -1, &status);
1062        TEST_ASSERT_SUCCESS(status);
1063
1064        /* The TEST_ASSERT_SUCCESS call above should change too... */
1065        if(U_SUCCESS(status)) {
1066            memset(fields, -1, sizeof(fields));
1067            numFields =
1068                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1069            TEST_ASSERT_SUCCESS(status);
1070
1071            /* The TEST_ASSERT_SUCCESS call above should change too... */
1072            if(U_SUCCESS(status)) {
1073                TEST_ASSERT(numFields == 2);
1074                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1075                TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1076                TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1077
1078                spaceNeeded = u_strlen(textToSplit) -
1079                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1080                            numFields;          /* Each field gets a NUL terminator */
1081
1082                TEST_ASSERT(spaceNeeded == requiredCapacity);
1083
1084                /* Split with a range of output buffer sizes.  */
1085                spaceNeeded = u_strlen(textToSplit) -
1086                    (numFields - 1)  +  /* Field delimiters do not appear in output */
1087                    numFields;          /* Each field gets a NUL terminator */
1088
1089                for (sz=0; sz < spaceNeeded+1; sz++) {
1090                    memset(fields, -1, sizeof(fields));
1091                    status = U_ZERO_ERROR;
1092                    numFields =
1093                        uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1094                    if (sz >= spaceNeeded) {
1095                        TEST_ASSERT_SUCCESS(status);
1096                        TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1097                        TEST_ASSERT_STRING(" second", fields[1], TRUE);
1098                        TEST_ASSERT_STRING("  third", fields[2], TRUE);
1099                    } else {
1100                        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1101                    }
1102                    TEST_ASSERT(numFields == 3);
1103                    TEST_ASSERT(fields[3] == NULL);
1104                    TEST_ASSERT(spaceNeeded == requiredCapacity);
1105                }
1106            }
1107        }
1108
1109        uregex_close(re);
1110    }
1111
1112
1113
1114
1115    /* Split(), part 2.  Patterns with capture groups.  The capture group text
1116     *                   comes out as additional fields.  */
1117    {
1118        UChar    textToSplit[80];
1119        UChar    buf[200];
1120        UChar    *fields[10];
1121        int32_t  numFields;
1122        int32_t  requiredCapacity;
1123        int32_t  spaceNeeded;
1124        int32_t  sz;
1125
1126        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
1127
1128        status = U_ZERO_ERROR;
1129        re = uregex_openC("<(.*?)>", 0, NULL, &status);
1130
1131        uregex_setText(re, textToSplit, -1, &status);
1132        TEST_ASSERT_SUCCESS(status);
1133
1134        /* The TEST_ASSERT_SUCCESS call above should change too... */
1135        if(U_SUCCESS(status)) {
1136            memset(fields, -1, sizeof(fields));
1137            numFields =
1138                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1139            TEST_ASSERT_SUCCESS(status);
1140
1141            /* The TEST_ASSERT_SUCCESS call above should change too... */
1142            if(U_SUCCESS(status)) {
1143                TEST_ASSERT(numFields == 5);
1144                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1145                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1146                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1147                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1148                TEST_ASSERT_STRING("  third", fields[4], TRUE);
1149                TEST_ASSERT(fields[5] == NULL);
1150                spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1151                TEST_ASSERT(spaceNeeded == requiredCapacity);
1152            }
1153        }
1154
1155        /*  Split with too few output strings available (2) */
1156        status = U_ZERO_ERROR;
1157        memset(fields, -1, sizeof(fields));
1158        numFields =
1159            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1160        TEST_ASSERT_SUCCESS(status);
1161
1162        /* The TEST_ASSERT_SUCCESS call above should change too... */
1163        if(U_SUCCESS(status)) {
1164            TEST_ASSERT(numFields == 2);
1165            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1166            TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1167            TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1168
1169            spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1170            TEST_ASSERT(spaceNeeded == requiredCapacity);
1171        }
1172
1173        /*  Split with too few output strings available (3) */
1174        status = U_ZERO_ERROR;
1175        memset(fields, -1, sizeof(fields));
1176        numFields =
1177            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
1178        TEST_ASSERT_SUCCESS(status);
1179
1180        /* The TEST_ASSERT_SUCCESS call above should change too... */
1181        if(U_SUCCESS(status)) {
1182            TEST_ASSERT(numFields == 3);
1183            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1184            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1185            TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1186            TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1187
1188            spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1189            TEST_ASSERT(spaceNeeded == requiredCapacity);
1190        }
1191
1192        /*  Split with just enough output strings available (5) */
1193        status = U_ZERO_ERROR;
1194        memset(fields, -1, sizeof(fields));
1195        numFields =
1196            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
1197        TEST_ASSERT_SUCCESS(status);
1198
1199        /* The TEST_ASSERT_SUCCESS call above should change too... */
1200        if(U_SUCCESS(status)) {
1201            TEST_ASSERT(numFields == 5);
1202            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1203            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1204            TEST_ASSERT_STRING(" second", fields[2], TRUE);
1205            TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1206            TEST_ASSERT_STRING("  third", fields[4], TRUE);
1207            TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1208
1209            spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1210            TEST_ASSERT(spaceNeeded == requiredCapacity);
1211        }
1212
1213        /* Split, end of text is a field delimiter.   */
1214        status = U_ZERO_ERROR;
1215        sz = strlen("first <tag-a> second<tag-b>");
1216        uregex_setText(re, textToSplit, sz, &status);
1217        TEST_ASSERT_SUCCESS(status);
1218
1219        /* The TEST_ASSERT_SUCCESS call above should change too... */
1220        if(U_SUCCESS(status)) {
1221            memset(fields, -1, sizeof(fields));
1222            numFields =
1223                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
1224            TEST_ASSERT_SUCCESS(status);
1225
1226            /* The TEST_ASSERT_SUCCESS call above should change too... */
1227            if(U_SUCCESS(status)) {
1228                TEST_ASSERT(numFields == 5);
1229                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1230                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1231                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1232                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1233                TEST_ASSERT_STRING("",        fields[4], TRUE);
1234                TEST_ASSERT(fields[5] == NULL);
1235                TEST_ASSERT(fields[8] == NULL);
1236                TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1237                spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1238                TEST_ASSERT(spaceNeeded == requiredCapacity);
1239            }
1240        }
1241
1242        uregex_close(re);
1243    }
1244
1245    /*
1246     * set/getTimeLimit
1247     */
1248     TEST_SETUP("abc$", "abcdef", 0);
1249     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1250     uregex_setTimeLimit(re, 1000, &status);
1251     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1252     TEST_ASSERT_SUCCESS(status);
1253     uregex_setTimeLimit(re, -1, &status);
1254     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1255     status = U_ZERO_ERROR;
1256     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1257     TEST_TEARDOWN;
1258
1259     /*
1260      * set/get Stack Limit
1261      */
1262     TEST_SETUP("abc$", "abcdef", 0);
1263     TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1264     uregex_setStackLimit(re, 40000, &status);
1265     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1266     TEST_ASSERT_SUCCESS(status);
1267     uregex_setStackLimit(re, -1, &status);
1268     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1269     status = U_ZERO_ERROR;
1270     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1271     TEST_TEARDOWN;
1272
1273
1274     /*
1275      * Get/Set callback functions
1276      *     This test is copied from intltest regex/Callbacks
1277      *     The pattern and test data will run long enough to cause the callback
1278      *       to be invoked.  The nested '+' operators give exponential time
1279      *       behavior with increasing string length.
1280      */
1281     TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1282     callBackContext cbInfo = {4, 0, 0};
1283     const void     *pContext   = &cbInfo;
1284     URegexMatchCallback    *returnedFn = &TestCallbackFn;
1285
1286     /*  Getting the callback fn when it hasn't been set must return NULL  */
1287     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1288     TEST_ASSERT_SUCCESS(status);
1289     TEST_ASSERT(returnedFn == NULL);
1290     TEST_ASSERT(pContext == NULL);
1291
1292     /* Set thecallback and do a match.                                   */
1293     /* The callback function should record that it has been called.      */
1294     uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1295     TEST_ASSERT_SUCCESS(status);
1296     TEST_ASSERT(cbInfo.numCalls == 0);
1297     TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1298     TEST_ASSERT_SUCCESS(status);
1299     TEST_ASSERT(cbInfo.numCalls > 0);
1300
1301     /* Getting the callback should return the values that were set above.  */
1302     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1303     TEST_ASSERT(returnedFn == &TestCallbackFn);
1304     TEST_ASSERT(pContext == &cbInfo);
1305
1306     TEST_TEARDOWN;
1307}
1308
1309
1310
1311static void TestBug4315(void) {
1312    UErrorCode      theICUError = U_ZERO_ERROR;
1313    URegularExpression *theRegEx;
1314    UChar           *textBuff;
1315    const char      *thePattern;
1316    UChar            theString[100];
1317    UChar           *destFields[24];
1318    int32_t         neededLength1;
1319    int32_t         neededLength2;
1320
1321    int32_t         wordCount = 0;
1322    int32_t         destFieldsSize = 24;
1323
1324    thePattern  = "ck ";
1325    u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1326
1327    /* open a regex */
1328    theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1329    TEST_ASSERT_SUCCESS(theICUError);
1330
1331    /* set the input string */
1332    uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1333    TEST_ASSERT_SUCCESS(theICUError);
1334
1335    /* split */
1336    /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1337     *  error occurs! */
1338    wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1339        destFieldsSize, &theICUError);
1340
1341    TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1342    TEST_ASSERT(wordCount==3);
1343
1344    if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1345    {
1346        theICUError = U_ZERO_ERROR;
1347        textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1348        wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1349            destFields, destFieldsSize, &theICUError);
1350        TEST_ASSERT(wordCount==3);
1351        TEST_ASSERT_SUCCESS(theICUError);
1352        TEST_ASSERT(neededLength1 == neededLength2);
1353        TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1354        TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1355        TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1356        TEST_ASSERT(destFields[3] == NULL);
1357        free(textBuff);
1358    }
1359    uregex_close(theRegEx);
1360}
1361
1362/* Based on TestRegexCAPI() */
1363static void TestUTextAPI(void) {
1364    UErrorCode           status = U_ZERO_ERROR;
1365    URegularExpression  *re;
1366    UText                patternText = UTEXT_INITIALIZER;
1367    UChar                pat[200];
1368    const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1369
1370    /* Mimimalist open/close */
1371    utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1372    re = uregex_openUText(&patternText, 0, 0, &status);
1373    if (U_FAILURE(status)) {
1374         log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1375         utext_close(&patternText);
1376         return;
1377    }
1378    uregex_close(re);
1379
1380    /* Open with all flag values set */
1381    status = U_ZERO_ERROR;
1382    re = uregex_openUText(&patternText,
1383        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1384        0, &status);
1385    TEST_ASSERT_SUCCESS(status);
1386    uregex_close(re);
1387
1388    /* Open with an invalid flag */
1389    status = U_ZERO_ERROR;
1390    re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1391    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1392    uregex_close(re);
1393
1394    /* open with an invalid parameter */
1395    status = U_ZERO_ERROR;
1396    re = uregex_openUText(NULL,
1397        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1398    TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1399
1400    /*
1401     *  clone
1402     */
1403    {
1404        URegularExpression *clone1;
1405        URegularExpression *clone2;
1406        URegularExpression *clone3;
1407        UChar  testString1[30];
1408        UChar  testString2[30];
1409        UBool  result;
1410
1411
1412        status = U_ZERO_ERROR;
1413        re = uregex_openUText(&patternText, 0, 0, &status);
1414        TEST_ASSERT_SUCCESS(status);
1415        clone1 = uregex_clone(re, &status);
1416        TEST_ASSERT_SUCCESS(status);
1417        TEST_ASSERT(clone1 != NULL);
1418
1419        status = U_ZERO_ERROR;
1420        clone2 = uregex_clone(re, &status);
1421        TEST_ASSERT_SUCCESS(status);
1422        TEST_ASSERT(clone2 != NULL);
1423        uregex_close(re);
1424
1425        status = U_ZERO_ERROR;
1426        clone3 = uregex_clone(clone2, &status);
1427        TEST_ASSERT_SUCCESS(status);
1428        TEST_ASSERT(clone3 != NULL);
1429
1430        u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
1431        u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
1432
1433        status = U_ZERO_ERROR;
1434        uregex_setText(clone1, testString1, -1, &status);
1435        TEST_ASSERT_SUCCESS(status);
1436        result = uregex_lookingAt(clone1, 0, &status);
1437        TEST_ASSERT_SUCCESS(status);
1438        TEST_ASSERT(result==TRUE);
1439
1440        status = U_ZERO_ERROR;
1441        uregex_setText(clone2, testString2, -1, &status);
1442        TEST_ASSERT_SUCCESS(status);
1443        result = uregex_lookingAt(clone2, 0, &status);
1444        TEST_ASSERT_SUCCESS(status);
1445        TEST_ASSERT(result==FALSE);
1446        result = uregex_find(clone2, 0, &status);
1447        TEST_ASSERT_SUCCESS(status);
1448        TEST_ASSERT(result==TRUE);
1449
1450        uregex_close(clone1);
1451        uregex_close(clone2);
1452        uregex_close(clone3);
1453
1454    }
1455
1456    /*
1457     *  pattern() and patternText()
1458     */
1459    {
1460        const UChar  *resultPat;
1461        int32_t       resultLen;
1462        UText        *resultText;
1463        const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1464        const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1465        u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
1466        status = U_ZERO_ERROR;
1467
1468        utext_openUTF8(&patternText, str_hello, -1, &status);
1469        re = uregex_open(pat, -1, 0, NULL, &status);
1470        resultPat = uregex_pattern(re, &resultLen, &status);
1471        TEST_ASSERT_SUCCESS(status);
1472
1473        /* The TEST_ASSERT_SUCCESS above should change too... */
1474        if (U_SUCCESS(status)) {
1475            TEST_ASSERT(resultLen == -1);
1476            TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1477        }
1478
1479        resultText = uregex_patternUText(re, &status);
1480        TEST_ASSERT_SUCCESS(status);
1481        TEST_ASSERT_UTEXT(str_hello, resultText);
1482
1483        uregex_close(re);
1484
1485        status = U_ZERO_ERROR;
1486        re = uregex_open(pat, 3, 0, NULL, &status);
1487        resultPat = uregex_pattern(re, &resultLen, &status);
1488        TEST_ASSERT_SUCCESS(status);
1489
1490        /* The TEST_ASSERT_SUCCESS above should change too... */
1491        if (U_SUCCESS(status)) {
1492            TEST_ASSERT(resultLen == 3);
1493            TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1494            TEST_ASSERT(u_strlen(resultPat) == 3);
1495        }
1496
1497        resultText = uregex_patternUText(re, &status);
1498        TEST_ASSERT_SUCCESS(status);
1499        TEST_ASSERT_UTEXT(str_hel, resultText);
1500
1501        uregex_close(re);
1502    }
1503
1504    /*
1505     *  setUText() and lookingAt()
1506     */
1507    {
1508        UText  text1 = UTEXT_INITIALIZER;
1509        UText  text2 = UTEXT_INITIALIZER;
1510        UBool  result;
1511        const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1512        const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1513        const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1514        status = U_ZERO_ERROR;
1515        utext_openUTF8(&text1, str_abcccd, -1, &status);
1516        utext_openUTF8(&text2, str_abcccxd, -1, &status);
1517
1518        utext_openUTF8(&patternText, str_abcd, -1, &status);
1519        re = uregex_openUText(&patternText, 0, NULL, &status);
1520        TEST_ASSERT_SUCCESS(status);
1521
1522        /* Operation before doing a setText should fail... */
1523        status = U_ZERO_ERROR;
1524        uregex_lookingAt(re, 0, &status);
1525        TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1526
1527        status = U_ZERO_ERROR;
1528        uregex_setUText(re, &text1, &status);
1529        result = uregex_lookingAt(re, 0, &status);
1530        TEST_ASSERT(result == TRUE);
1531        TEST_ASSERT_SUCCESS(status);
1532
1533        status = U_ZERO_ERROR;
1534        uregex_setUText(re, &text2, &status);
1535        result = uregex_lookingAt(re, 0, &status);
1536        TEST_ASSERT(result == FALSE);
1537        TEST_ASSERT_SUCCESS(status);
1538
1539        status = U_ZERO_ERROR;
1540        uregex_setUText(re, &text1, &status);
1541        result = uregex_lookingAt(re, 0, &status);
1542        TEST_ASSERT(result == TRUE);
1543        TEST_ASSERT_SUCCESS(status);
1544
1545        uregex_close(re);
1546        utext_close(&text1);
1547        utext_close(&text2);
1548    }
1549
1550
1551    /*
1552     *  getText() and getUText()
1553     */
1554    {
1555        UText  text1 = UTEXT_INITIALIZER;
1556        UText  text2 = UTEXT_INITIALIZER;
1557        UChar  text2Chars[20];
1558        UText  *resultText;
1559        const UChar   *result;
1560        int32_t  textLength;
1561        const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1562        const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1563        const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1564
1565
1566        status = U_ZERO_ERROR;
1567        utext_openUTF8(&text1, str_abcccd, -1, &status);
1568        u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
1569        utext_openUChars(&text2, text2Chars, -1, &status);
1570
1571        utext_openUTF8(&patternText, str_abcd, -1, &status);
1572        re = uregex_openUText(&patternText, 0, NULL, &status);
1573
1574        /* First set a UText */
1575        uregex_setUText(re, &text1, &status);
1576        resultText = uregex_getUText(re, NULL, &status);
1577        TEST_ASSERT_SUCCESS(status);
1578        TEST_ASSERT(resultText != &text1);
1579        utext_setNativeIndex(resultText, 0);
1580        utext_setNativeIndex(&text1, 0);
1581        TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
1582        utext_close(resultText);
1583
1584        result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1585        TEST_ASSERT(textLength == -1 || textLength == 6);
1586        resultText = uregex_getUText(re, NULL, &status);
1587        TEST_ASSERT_SUCCESS(status);
1588        TEST_ASSERT(resultText != &text1);
1589        utext_setNativeIndex(resultText, 0);
1590        utext_setNativeIndex(&text1, 0);
1591        TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
1592        utext_close(resultText);
1593
1594        /* Then set a UChar * */
1595        uregex_setText(re, text2Chars, 7, &status);
1596        resultText = uregex_getUText(re, NULL, &status);
1597        TEST_ASSERT_SUCCESS(status);
1598        utext_setNativeIndex(resultText, 0);
1599        utext_setNativeIndex(&text2, 0);
1600        TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0);
1601        utext_close(resultText);
1602        result = uregex_getText(re, &textLength, &status);
1603        TEST_ASSERT(textLength == 7);
1604
1605        uregex_close(re);
1606        utext_close(&text1);
1607        utext_close(&text2);
1608    }
1609
1610    /*
1611     *  matches()
1612     */
1613    {
1614        UText   text1 = UTEXT_INITIALIZER;
1615        UBool   result;
1616        UText   nullText = UTEXT_INITIALIZER;
1617        const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1618        const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1619
1620        status = U_ZERO_ERROR;
1621        utext_openUTF8(&text1, str_abcccde, -1, &status);
1622        utext_openUTF8(&patternText, str_abcd, -1, &status);
1623        re = uregex_openUText(&patternText, 0, NULL, &status);
1624
1625        uregex_setUText(re, &text1, &status);
1626        result = uregex_matches(re, 0, &status);
1627        TEST_ASSERT(result == FALSE);
1628        TEST_ASSERT_SUCCESS(status);
1629        uregex_close(re);
1630
1631        status = U_ZERO_ERROR;
1632        re = uregex_openC(".?", 0, NULL, &status);
1633        uregex_setUText(re, &text1, &status);
1634        result = uregex_matches(re, 7, &status);
1635        TEST_ASSERT(result == TRUE);
1636        TEST_ASSERT_SUCCESS(status);
1637
1638        status = U_ZERO_ERROR;
1639        utext_openUTF8(&nullText, "", -1, &status);
1640        uregex_setUText(re, &nullText, &status);
1641        TEST_ASSERT_SUCCESS(status);
1642        result = uregex_matches(re, 0, &status);
1643        TEST_ASSERT(result == TRUE);
1644        TEST_ASSERT_SUCCESS(status);
1645
1646        uregex_close(re);
1647        utext_close(&text1);
1648        utext_close(&nullText);
1649    }
1650
1651
1652    /*
1653     *  lookingAt()    Used in setText test.
1654     */
1655
1656
1657    /*
1658     *  find(), findNext, start, end, reset
1659     */
1660    {
1661        UChar    text1[50];
1662        UBool    result;
1663        u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
1664        status = U_ZERO_ERROR;
1665        re = uregex_openC("rx", 0, NULL, &status);
1666
1667        uregex_setText(re, text1, -1, &status);
1668        result = uregex_find(re, 0, &status);
1669        TEST_ASSERT(result == TRUE);
1670        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1671        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1672        TEST_ASSERT_SUCCESS(status);
1673
1674        result = uregex_find(re, 9, &status);
1675        TEST_ASSERT(result == TRUE);
1676        TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1677        TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1678        TEST_ASSERT_SUCCESS(status);
1679
1680        result = uregex_find(re, 14, &status);
1681        TEST_ASSERT(result == FALSE);
1682        TEST_ASSERT_SUCCESS(status);
1683
1684        status = U_ZERO_ERROR;
1685        uregex_reset(re, 0, &status);
1686
1687        result = uregex_findNext(re, &status);
1688        TEST_ASSERT(result == TRUE);
1689        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1690        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1691        TEST_ASSERT_SUCCESS(status);
1692
1693        result = uregex_findNext(re, &status);
1694        TEST_ASSERT(result == TRUE);
1695        TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1696        TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1697        TEST_ASSERT_SUCCESS(status);
1698
1699        status = U_ZERO_ERROR;
1700        uregex_reset(re, 12, &status);
1701
1702        result = uregex_findNext(re, &status);
1703        TEST_ASSERT(result == TRUE);
1704        TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1705        TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1706        TEST_ASSERT_SUCCESS(status);
1707
1708        result = uregex_findNext(re, &status);
1709        TEST_ASSERT(result == FALSE);
1710        TEST_ASSERT_SUCCESS(status);
1711
1712        uregex_close(re);
1713    }
1714
1715    /*
1716     *  group()
1717     */
1718    {
1719        UChar    text1[80];
1720        UText   *actual;
1721        UBool    result;
1722
1723        const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
1724        const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
1725
1726
1727        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
1728
1729        status = U_ZERO_ERROR;
1730        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1731        TEST_ASSERT_SUCCESS(status);
1732
1733        uregex_setText(re, text1, -1, &status);
1734        result = uregex_find(re, 0, &status);
1735        TEST_ASSERT(result==TRUE);
1736
1737        /*  Capture Group 0, the full match.  Should succeed.  */
1738        status = U_ZERO_ERROR;
1739        actual = uregex_groupUTextDeep(re, 0, NULL, &status);
1740        TEST_ASSERT_SUCCESS(status);
1741        TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
1742        utext_close(actual);
1743
1744        /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1745        status = U_ZERO_ERROR;
1746        {
1747            int64_t      group_len;
1748            int32_t      len16;
1749            UErrorCode   shallowStatus = U_ZERO_ERROR;
1750            int64_t      nativeIndex;
1751            UChar *groupChars;
1752            UText groupText = UTEXT_INITIALIZER;
1753
1754            actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
1755            TEST_ASSERT_SUCCESS(status);
1756
1757            nativeIndex = utext_getNativeIndex(actual);
1758            /*  Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp]  */
1759            /*  len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus);  */
1760            len16 = (int32_t)group_len;
1761
1762            groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
1763            utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
1764
1765            utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
1766
1767            TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
1768            utext_close(&groupText);
1769            free(groupChars);
1770        }
1771        utext_close(actual);
1772
1773        /*  Capture group #1.  Should succeed. */
1774        status = U_ZERO_ERROR;
1775        actual = uregex_groupUTextDeep(re, 1, NULL, &status);
1776        TEST_ASSERT_SUCCESS(status);
1777        TEST_ASSERT_UTEXT(str_interior, actual);
1778        utext_close(actual);
1779
1780        /*  Capture group out of range.  Error. */
1781        status = U_ZERO_ERROR;
1782        actual = uregex_groupUTextDeep(re, 2, NULL, &status);
1783        TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1784        TEST_ASSERT(utext_nativeLength(actual) == 0);
1785        utext_close(actual);
1786
1787        uregex_close(re);
1788
1789    }
1790
1791    /*
1792     *  replaceFirst()
1793     */
1794    {
1795        UChar    text1[80];
1796        UChar    text2[80];
1797        UText    replText = UTEXT_INITIALIZER;
1798        UText   *result;
1799        const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1800        const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1801        const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
1802        const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1803        const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1804        status = U_ZERO_ERROR;
1805        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1806        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1807        utext_openUTF8(&replText, str_1x, -1, &status);
1808
1809        re = uregex_openC("x(.*?)x", 0, NULL, &status);
1810        TEST_ASSERT_SUCCESS(status);
1811
1812        /*  Normal case, with match */
1813        uregex_setText(re, text1, -1, &status);
1814        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1815        TEST_ASSERT_SUCCESS(status);
1816        TEST_ASSERT_UTEXT(str_Replxxx, result);
1817        utext_close(result);
1818
1819        /* No match.  Text should copy to output with no changes.  */
1820        uregex_setText(re, text2, -1, &status);
1821        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1822        TEST_ASSERT_SUCCESS(status);
1823        TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1824        utext_close(result);
1825
1826        /* Unicode escapes */
1827        uregex_setText(re, text1, -1, &status);
1828        utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1829        result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1830        TEST_ASSERT_SUCCESS(status);
1831        TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1832        utext_close(result);
1833
1834        uregex_close(re);
1835        utext_close(&replText);
1836    }
1837
1838
1839    /*
1840     *  replaceAll()
1841     */
1842    {
1843        UChar    text1[80];
1844        UChar    text2[80];
1845        UText    replText = UTEXT_INITIALIZER;
1846        UText   *result;
1847        const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1848        const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1849        const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1850        status = U_ZERO_ERROR;
1851        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1852        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1853        utext_openUTF8(&replText, str_1, -1, &status);
1854
1855        re = uregex_openC("x(.*?)x", 0, NULL, &status);
1856        TEST_ASSERT_SUCCESS(status);
1857
1858        /*  Normal case, with match */
1859        uregex_setText(re, text1, -1, &status);
1860        result = uregex_replaceAllUText(re, &replText, NULL, &status);
1861        TEST_ASSERT_SUCCESS(status);
1862        TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1863        utext_close(result);
1864
1865        /* No match.  Text should copy to output with no changes.  */
1866        uregex_setText(re, text2, -1, &status);
1867        result = uregex_replaceAllUText(re, &replText, NULL, &status);
1868        TEST_ASSERT_SUCCESS(status);
1869        TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1870        utext_close(result);
1871
1872        uregex_close(re);
1873        utext_close(&replText);
1874    }
1875
1876
1877    /*
1878     *  appendReplacement()
1879     */
1880    {
1881        UChar    text[100];
1882        UChar    repl[100];
1883        UChar    buf[100];
1884        UChar   *bufPtr;
1885        int32_t  bufCap;
1886
1887        status = U_ZERO_ERROR;
1888        re = uregex_openC(".*", 0, 0, &status);
1889        TEST_ASSERT_SUCCESS(status);
1890
1891        u_uastrncpy(text, "whatever",  sizeof(text)/2);
1892        u_uastrncpy(repl, "some other", sizeof(repl)/2);
1893        uregex_setText(re, text, -1, &status);
1894
1895        /* match covers whole target string */
1896        uregex_find(re, 0, &status);
1897        TEST_ASSERT_SUCCESS(status);
1898        bufPtr = buf;
1899        bufCap = sizeof(buf) / 2;
1900        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1901        TEST_ASSERT_SUCCESS(status);
1902        TEST_ASSERT_STRING("some other", buf, TRUE);
1903
1904        /* Match has \u \U escapes */
1905        uregex_find(re, 0, &status);
1906        TEST_ASSERT_SUCCESS(status);
1907        bufPtr = buf;
1908        bufCap = sizeof(buf) / 2;
1909        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1910        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1911        TEST_ASSERT_SUCCESS(status);
1912        TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1913
1914        uregex_close(re);
1915    }
1916
1917
1918    /*
1919     *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1920     */
1921
1922    /*
1923     *  splitUText()
1924     */
1925    {
1926        UChar    textToSplit[80];
1927        UChar    text2[80];
1928        UText    *fields[10];
1929        int32_t  numFields;
1930        int32_t i;
1931
1932        u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1933        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1934
1935        status = U_ZERO_ERROR;
1936        re = uregex_openC(":", 0, NULL, &status);
1937
1938
1939        /*  Simple split */
1940
1941        uregex_setText(re, textToSplit, -1, &status);
1942        TEST_ASSERT_SUCCESS(status);
1943
1944        /* The TEST_ASSERT_SUCCESS call above should change too... */
1945        if (U_SUCCESS(status)) {
1946            memset(fields, 0, sizeof(fields));
1947            numFields = uregex_splitUText(re, fields, 10, &status);
1948            TEST_ASSERT_SUCCESS(status);
1949
1950            /* The TEST_ASSERT_SUCCESS call above should change too... */
1951            if(U_SUCCESS(status)) {
1952              const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1953              const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1954              const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1955                TEST_ASSERT(numFields == 3);
1956                TEST_ASSERT_UTEXT(str_first,  fields[0]);
1957                TEST_ASSERT_UTEXT(str_second, fields[1]);
1958                TEST_ASSERT_UTEXT(str_third, fields[2]);
1959                TEST_ASSERT(fields[3] == NULL);
1960            }
1961            for(i = 0; i < numFields; i++) {
1962                utext_close(fields[i]);
1963            }
1964        }
1965
1966        uregex_close(re);
1967
1968
1969        /*  Split with too few output strings available */
1970        status = U_ZERO_ERROR;
1971        re = uregex_openC(":", 0, NULL, &status);
1972        uregex_setText(re, textToSplit, -1, &status);
1973        TEST_ASSERT_SUCCESS(status);
1974
1975        /* The TEST_ASSERT_SUCCESS call above should change too... */
1976        if(U_SUCCESS(status)) {
1977            fields[0] = NULL;
1978            fields[1] = NULL;
1979            fields[2] = &patternText;
1980            numFields = uregex_splitUText(re, fields, 2, &status);
1981            TEST_ASSERT_SUCCESS(status);
1982
1983            /* The TEST_ASSERT_SUCCESS call above should change too... */
1984            if(U_SUCCESS(status)) {
1985                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
1986                const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
1987                TEST_ASSERT(numFields == 2);
1988                TEST_ASSERT_UTEXT(str_first,  fields[0]);
1989                TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
1990                TEST_ASSERT(fields[2] == &patternText);
1991            }
1992            for(i = 0; i < numFields; i++) {
1993                utext_close(fields[i]);
1994            }
1995        }
1996
1997        uregex_close(re);
1998    }
1999
2000    /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2001     *                   comes out as additional fields.  */
2002    {
2003        UChar    textToSplit[80];
2004        UText    *fields[10];
2005        int32_t  numFields;
2006        int32_t i;
2007
2008        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
2009
2010        status = U_ZERO_ERROR;
2011        re = uregex_openC("<(.*?)>", 0, NULL, &status);
2012
2013        uregex_setText(re, textToSplit, -1, &status);
2014        TEST_ASSERT_SUCCESS(status);
2015
2016        /* The TEST_ASSERT_SUCCESS call above should change too... */
2017        if(U_SUCCESS(status)) {
2018            memset(fields, 0, sizeof(fields));
2019            numFields = uregex_splitUText(re, fields, 10, &status);
2020            TEST_ASSERT_SUCCESS(status);
2021
2022            /* The TEST_ASSERT_SUCCESS call above should change too... */
2023            if(U_SUCCESS(status)) {
2024                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2025                const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2026                const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2027                const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2028                const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2029
2030                TEST_ASSERT(numFields == 5);
2031                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2032                TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2033                TEST_ASSERT_UTEXT(str_second, fields[2]);
2034                TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2035                TEST_ASSERT_UTEXT(str_third, fields[4]);
2036                TEST_ASSERT(fields[5] == NULL);
2037            }
2038            for(i = 0; i < numFields; i++) {
2039                utext_close(fields[i]);
2040            }
2041        }
2042
2043        /*  Split with too few output strings available (2) */
2044        status = U_ZERO_ERROR;
2045        fields[0] = NULL;
2046        fields[1] = NULL;
2047        fields[2] = &patternText;
2048        numFields = uregex_splitUText(re, fields, 2, &status);
2049        TEST_ASSERT_SUCCESS(status);
2050
2051        /* The TEST_ASSERT_SUCCESS call above should change too... */
2052        if(U_SUCCESS(status)) {
2053            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2054            const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2055            TEST_ASSERT(numFields == 2);
2056            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2057            TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2058            TEST_ASSERT(fields[2] == &patternText);
2059        }
2060        for(i = 0; i < numFields; i++) {
2061            utext_close(fields[i]);
2062        }
2063
2064
2065        /*  Split with too few output strings available (3) */
2066        status = U_ZERO_ERROR;
2067        fields[0] = NULL;
2068        fields[1] = NULL;
2069        fields[2] = NULL;
2070        fields[3] = &patternText;
2071        numFields = uregex_splitUText(re, fields, 3, &status);
2072        TEST_ASSERT_SUCCESS(status);
2073
2074        /* The TEST_ASSERT_SUCCESS call above should change too... */
2075        if(U_SUCCESS(status)) {
2076            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2077            const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2078            const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2079            TEST_ASSERT(numFields == 3);
2080            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2081            TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2082            TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2083            TEST_ASSERT(fields[3] == &patternText);
2084        }
2085        for(i = 0; i < numFields; i++) {
2086            utext_close(fields[i]);
2087        }
2088
2089        /*  Split with just enough output strings available (5) */
2090        status = U_ZERO_ERROR;
2091        fields[0] = NULL;
2092        fields[1] = NULL;
2093        fields[2] = NULL;
2094        fields[3] = NULL;
2095        fields[4] = NULL;
2096        fields[5] = &patternText;
2097        numFields = uregex_splitUText(re, fields, 5, &status);
2098        TEST_ASSERT_SUCCESS(status);
2099
2100        /* The TEST_ASSERT_SUCCESS call above should change too... */
2101        if(U_SUCCESS(status)) {
2102            const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2103            const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2104            const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2105            const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2106            const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2107
2108            TEST_ASSERT(numFields == 5);
2109            TEST_ASSERT_UTEXT(str_first,  fields[0]);
2110            TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2111            TEST_ASSERT_UTEXT(str_second, fields[2]);
2112            TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2113            TEST_ASSERT_UTEXT(str_third, fields[4]);
2114            TEST_ASSERT(fields[5] == &patternText);
2115        }
2116        for(i = 0; i < numFields; i++) {
2117            utext_close(fields[i]);
2118        }
2119
2120        /* Split, end of text is a field delimiter.   */
2121        status = U_ZERO_ERROR;
2122        uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2123        TEST_ASSERT_SUCCESS(status);
2124
2125        /* The TEST_ASSERT_SUCCESS call above should change too... */
2126        if(U_SUCCESS(status)) {
2127            memset(fields, 0, sizeof(fields));
2128            fields[9] = &patternText;
2129            numFields = uregex_splitUText(re, fields, 9, &status);
2130            TEST_ASSERT_SUCCESS(status);
2131
2132            /* The TEST_ASSERT_SUCCESS call above should change too... */
2133            if(U_SUCCESS(status)) {
2134                const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2135                const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2136                const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2137                const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2138                const char str_empty[] = { 0x00 };
2139
2140                TEST_ASSERT(numFields == 5);
2141                TEST_ASSERT_UTEXT(str_first,  fields[0]);
2142                TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2143                TEST_ASSERT_UTEXT(str_second, fields[2]);
2144                TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2145                TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2146                TEST_ASSERT(fields[5] == NULL);
2147                TEST_ASSERT(fields[8] == NULL);
2148                TEST_ASSERT(fields[9] == &patternText);
2149            }
2150            for(i = 0; i < numFields; i++) {
2151                utext_close(fields[i]);
2152            }
2153        }
2154
2155        uregex_close(re);
2156    }
2157    utext_close(&patternText);
2158}
2159
2160
2161static void TestRefreshInput(void) {
2162    /*
2163     *  RefreshInput changes out the input of a URegularExpression without
2164     *    changing anything else in the match state.  Used with Java JNI,
2165     *    when Java moves the underlying string storage.   This test
2166     *    runs a find() loop, moving the text after the first match.
2167     *    The right number of matches should still be found.
2168     */
2169    UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2170    UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2171    UErrorCode status = U_ZERO_ERROR;
2172    URegularExpression *re;
2173    UText ut1 = UTEXT_INITIALIZER;
2174    UText ut2 = UTEXT_INITIALIZER;
2175
2176    re = uregex_openC("[ABC]", 0, 0, &status);
2177    TEST_ASSERT_SUCCESS(status);
2178
2179    utext_openUChars(&ut1, testStr, -1, &status);
2180    TEST_ASSERT_SUCCESS(status);
2181    uregex_setUText(re, &ut1, &status);
2182    TEST_ASSERT_SUCCESS(status);
2183
2184    /* Find the first match "A" in the original string */
2185    TEST_ASSERT(uregex_findNext(re, &status));
2186    TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2187
2188    /* Move the string, kill the original string.  */
2189    u_strcpy(movedStr, testStr);
2190    u_memset(testStr, 0, u_strlen(testStr));
2191    utext_openUChars(&ut2, movedStr, -1, &status);
2192    TEST_ASSERT_SUCCESS(status);
2193    uregex_refreshUText(re, &ut2, &status);
2194    TEST_ASSERT_SUCCESS(status);
2195
2196    /* Find the following two matches, now working in the moved string. */
2197    TEST_ASSERT(uregex_findNext(re, &status));
2198    TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2199    TEST_ASSERT(uregex_findNext(re, &status));
2200    TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2201    TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2202
2203    uregex_close(re);
2204}
2205
2206
2207static void TestBug8421(void) {
2208    /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2209     *             was failing.
2210     */
2211    URegularExpression *re;
2212    UErrorCode status = U_ZERO_ERROR;
2213    int32_t  limit = -1;
2214
2215    re = uregex_openC("abc", 0, 0, &status);
2216    TEST_ASSERT_SUCCESS(status);
2217
2218    limit = uregex_getTimeLimit(re, &status);
2219    TEST_ASSERT_SUCCESS(status);
2220    TEST_ASSERT(limit == 0);
2221
2222    uregex_setTimeLimit(re, 100, &status);
2223    TEST_ASSERT_SUCCESS(status);
2224    limit = uregex_getTimeLimit(re, &status);
2225    TEST_ASSERT_SUCCESS(status);
2226    TEST_ASSERT(limit == 100);
2227
2228    uregex_close(re);
2229}
2230
2231
2232#endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
2233