1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2004-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File reapits.c
9*
10*********************************************************************************/
11/*C API TEST FOR Regular Expressions */
12/**
13*   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
14*   try to test the full functionality.  It just calls each function and verifies that it
15*   works on a basic level.
16*
17*   More complete testing of regular expression functionality is done with the C++ tests.
18**/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_REGULAR_EXPRESSIONS
23
24#include <stdlib.h>
25#include <string.h>
26#include "unicode/uloc.h"
27#include "unicode/uregex.h"
28#include "unicode/ustring.h"
29#include "cintltst.h"
30
31#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
32log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
33
34#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
36
37/*
38 *   TEST_SETUP and TEST_TEARDOWN
39 *         macros to handle the boilerplate around setting up regex test cases.
40 *         parameteres to setup:
41 *              pattern:     The regex pattern, a (char *) null terminated C string.
42 *              testString:  The string data, also a (char *) C string.
43 *              flags:       Regex flags to set when compiling the pattern
44 *
45 *         Put arbitrary test code between SETUP and TEARDOWN.
46 *         're" is the compiled, ready-to-go  regular expression.
47 */
48#define TEST_SETUP(pattern, testString, flags) {  \
49    UChar   *srcString = NULL;  \
50    status = U_ZERO_ERROR; \
51    re = uregex_openC(pattern, flags, NULL, &status);  \
52    TEST_ASSERT_SUCCESS(status);   \
53    srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
54    u_uastrncpy(srcString, testString,  strlen(testString)+1); \
55    uregex_setText(re, srcString, -1, &status); \
56    TEST_ASSERT_SUCCESS(status);  \
57    if (U_SUCCESS(status)) {
58
59#define TEST_TEARDOWN  \
60    }  \
61    TEST_ASSERT_SUCCESS(status);  \
62    uregex_close(re);  \
63    free(srcString);   \
64    }
65
66
67static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
68     char     buf_inside_macro[120];
69     int32_t  len = (int32_t)strlen(expected);
70     UBool    success;
71     if (nulTerm) {
72         u_austrncpy(buf_inside_macro, (actual), len+1);
73         buf_inside_macro[len+2] = 0;
74         success = (strcmp((expected), buf_inside_macro) == 0);
75     } else {
76         u_austrncpy(buf_inside_macro, (actual), len);
77         buf_inside_macro[len+1] = 0;
78         success = (strncmp((expected), buf_inside_macro, len) == 0);
79     }
80     if (success == FALSE) {
81         log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
82             file, line, (expected), buf_inside_macro);
83     }
84}
85
86#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
87
88
89
90
91
92static void TestRegexCAPI(void);
93static void TestBug4315(void);
94
95void addURegexTest(TestNode** root);
96
97void addURegexTest(TestNode** root)
98{
99    addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
100    addTest(root, &TestBug4315,   "regex/TestBug4315");
101}
102
103/*
104 * Call back function and context struct used for testing
105 *    regular expression user callbacks.  This test is mostly the same as
106 *   the corresponding C++ test in intltest.
107 */
108typedef struct callBackContext {
109    int32_t          maxCalls;
110    int32_t          numCalls;
111    int32_t          lastSteps;
112} callBackContext;
113
114static UBool U_EXPORT2 U_CALLCONV
115TestCallbackFn(const void *context, int32_t steps) {
116  callBackContext  *info = (callBackContext *)context;
117  if (info->lastSteps+1 != steps) {
118      log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
119  }
120  info->lastSteps = steps;
121  info->numCalls++;
122  return (info->numCalls < info->maxCalls);
123}
124
125/*
126 *   Regular Expression C API Tests
127 */
128static void TestRegexCAPI(void) {
129    UErrorCode           status = U_ZERO_ERROR;
130    URegularExpression  *re;
131    UChar                pat[200];
132    UChar               *minus1;
133
134    memset(&minus1, -1, sizeof(minus1));
135
136    /* Mimimalist open/close */
137    u_uastrncpy(pat, "abc*", sizeof(pat)/2);
138    re = uregex_open(pat, -1, 0, 0, &status);
139    if (U_FAILURE(status)) {
140         log_data_err("Failed to open regular expression, line %d, error is \"%s\" (Are you missing data?)\n", __LINE__, u_errorName(status));
141         return;
142    }
143    uregex_close(re);
144
145    /* Open with all flag values set */
146    status = U_ZERO_ERROR;
147    re = uregex_open(pat, -1,
148        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
149        0, &status);
150    TEST_ASSERT_SUCCESS(status);
151    uregex_close(re);
152
153    /* Open with an invalid flag */
154    status = U_ZERO_ERROR;
155    re = uregex_open(pat, -1, 0x40000000, 0, &status);
156    TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
157    uregex_close(re);
158
159    /* openC with an invalid parameter */
160    status = U_ZERO_ERROR;
161    re = uregex_openC(NULL,
162        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
163    TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
164
165    /* openC with an invalid parameter */
166    status = U_USELESS_COLLATOR_ERROR;
167    re = uregex_openC(NULL,
168        UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
169    TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
170
171    /* openC   open from a C string */
172    {
173        const UChar   *p;
174        int32_t  len;
175        status = U_ZERO_ERROR;
176        re = uregex_openC("abc*", 0, 0, &status);
177        TEST_ASSERT_SUCCESS(status);
178        p = uregex_pattern(re, &len, &status);
179        TEST_ASSERT_SUCCESS(status);
180
181        /* The TEST_ASSERT_SUCCESS above should change too... */
182        if(U_SUCCESS(status)) {
183            u_uastrncpy(pat, "abc*", sizeof(pat)/2);
184            TEST_ASSERT(u_strcmp(pat, p) == 0);
185            TEST_ASSERT(len==(int32_t)strlen("abc*"));
186        }
187
188        uregex_close(re);
189
190        /*  TODO:  Open with ParseError parameter */
191    }
192
193    /*
194     *  clone
195     */
196    {
197        URegularExpression *clone1;
198        URegularExpression *clone2;
199        URegularExpression *clone3;
200        UChar  testString1[30];
201        UChar  testString2[30];
202        UBool  result;
203
204
205        status = U_ZERO_ERROR;
206        re = uregex_openC("abc*", 0, 0, &status);
207        TEST_ASSERT_SUCCESS(status);
208        clone1 = uregex_clone(re, &status);
209        TEST_ASSERT_SUCCESS(status);
210        TEST_ASSERT(clone1 != NULL);
211
212        status = U_ZERO_ERROR;
213        clone2 = uregex_clone(re, &status);
214        TEST_ASSERT_SUCCESS(status);
215        TEST_ASSERT(clone2 != NULL);
216        uregex_close(re);
217
218        status = U_ZERO_ERROR;
219        clone3 = uregex_clone(clone2, &status);
220        TEST_ASSERT_SUCCESS(status);
221        TEST_ASSERT(clone3 != NULL);
222
223        u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
224        u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
225
226        status = U_ZERO_ERROR;
227        uregex_setText(clone1, testString1, -1, &status);
228        TEST_ASSERT_SUCCESS(status);
229        result = uregex_lookingAt(clone1, 0, &status);
230        TEST_ASSERT_SUCCESS(status);
231        TEST_ASSERT(result==TRUE);
232
233        status = U_ZERO_ERROR;
234        uregex_setText(clone2, testString2, -1, &status);
235        TEST_ASSERT_SUCCESS(status);
236        result = uregex_lookingAt(clone2, 0, &status);
237        TEST_ASSERT_SUCCESS(status);
238        TEST_ASSERT(result==FALSE);
239        result = uregex_find(clone2, 0, &status);
240        TEST_ASSERT_SUCCESS(status);
241        TEST_ASSERT(result==TRUE);
242
243        uregex_close(clone1);
244        uregex_close(clone2);
245        uregex_close(clone3);
246
247    }
248
249    /*
250     *  pattern()
251    */
252    {
253        const UChar  *resultPat;
254        int32_t       resultLen;
255        u_uastrncpy(pat, "hello", sizeof(pat)/2);
256        status = U_ZERO_ERROR;
257        re = uregex_open(pat, -1, 0, NULL, &status);
258        resultPat = uregex_pattern(re, &resultLen, &status);
259        TEST_ASSERT_SUCCESS(status);
260
261        /* The TEST_ASSERT_SUCCESS above should change too... */
262        if (U_SUCCESS(status)) {
263            TEST_ASSERT(resultLen == -1);
264            TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
265        }
266
267        uregex_close(re);
268
269        status = U_ZERO_ERROR;
270        re = uregex_open(pat, 3, 0, NULL, &status);
271        resultPat = uregex_pattern(re, &resultLen, &status);
272        TEST_ASSERT_SUCCESS(status);
273        TEST_ASSERT_SUCCESS(status);
274
275        /* The TEST_ASSERT_SUCCESS above should change too... */
276        if (U_SUCCESS(status)) {
277            TEST_ASSERT(resultLen == 3);
278            TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
279            TEST_ASSERT(u_strlen(resultPat) == 3);
280        }
281
282        uregex_close(re);
283    }
284
285    /*
286     *  flags()
287     */
288    {
289        int32_t  t;
290
291        status = U_ZERO_ERROR;
292        re = uregex_open(pat, -1, 0, NULL, &status);
293        t  = uregex_flags(re, &status);
294        TEST_ASSERT_SUCCESS(status);
295        TEST_ASSERT(t == 0);
296        uregex_close(re);
297
298        status = U_ZERO_ERROR;
299        re = uregex_open(pat, -1, 0, NULL, &status);
300        t  = uregex_flags(re, &status);
301        TEST_ASSERT_SUCCESS(status);
302        TEST_ASSERT(t == 0);
303        uregex_close(re);
304
305        status = U_ZERO_ERROR;
306        re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
307        t  = uregex_flags(re, &status);
308        TEST_ASSERT_SUCCESS(status);
309        TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
310        uregex_close(re);
311    }
312
313    /*
314     *  setText() and lookingAt()
315     */
316    {
317        UChar  text1[50];
318        UChar  text2[50];
319        UBool  result;
320
321        u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
322        u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
323        status = U_ZERO_ERROR;
324        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
325        re = uregex_open(pat, -1, 0, NULL, &status);
326        TEST_ASSERT_SUCCESS(status);
327
328        /* Operation before doing a setText should fail... */
329        status = U_ZERO_ERROR;
330        uregex_lookingAt(re, 0, &status);
331        TEST_ASSERT( status== U_REGEX_INVALID_STATE);
332
333        status = U_ZERO_ERROR;
334        uregex_setText(re, text1, -1, &status);
335        result = uregex_lookingAt(re, 0, &status);
336        TEST_ASSERT(result == TRUE);
337        TEST_ASSERT_SUCCESS(status);
338
339        status = U_ZERO_ERROR;
340        uregex_setText(re, text2, -1, &status);
341        result = uregex_lookingAt(re, 0, &status);
342        TEST_ASSERT(result == FALSE);
343        TEST_ASSERT_SUCCESS(status);
344
345        status = U_ZERO_ERROR;
346        uregex_setText(re, text1, -1, &status);
347        result = uregex_lookingAt(re, 0, &status);
348        TEST_ASSERT(result == TRUE);
349        TEST_ASSERT_SUCCESS(status);
350
351        status = U_ZERO_ERROR;
352        uregex_setText(re, text1, 5, &status);
353        result = uregex_lookingAt(re, 0, &status);
354        TEST_ASSERT(result == FALSE);
355        TEST_ASSERT_SUCCESS(status);
356
357        status = U_ZERO_ERROR;
358        uregex_setText(re, text1, 6, &status);
359        result = uregex_lookingAt(re, 0, &status);
360        TEST_ASSERT(result == TRUE);
361        TEST_ASSERT_SUCCESS(status);
362
363        uregex_close(re);
364    }
365
366
367    /*
368     *  getText()
369     */
370    {
371        UChar    text1[50];
372        UChar    text2[50];
373        const UChar   *result;
374        int32_t  textLength;
375
376        u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
377        u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
378        status = U_ZERO_ERROR;
379        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
380        re = uregex_open(pat, -1, 0, NULL, &status);
381
382        uregex_setText(re, text1, -1, &status);
383        result = uregex_getText(re, &textLength, &status);
384        TEST_ASSERT(result == text1);
385        TEST_ASSERT(textLength == -1);
386        TEST_ASSERT_SUCCESS(status);
387
388        status = U_ZERO_ERROR;
389        uregex_setText(re, text2, 7, &status);
390        result = uregex_getText(re, &textLength, &status);
391        TEST_ASSERT(result == text2);
392        TEST_ASSERT(textLength == 7);
393        TEST_ASSERT_SUCCESS(status);
394
395        status = U_ZERO_ERROR;
396        uregex_setText(re, text2, 4, &status);
397        result = uregex_getText(re, &textLength, &status);
398        TEST_ASSERT(result == text2);
399        TEST_ASSERT(textLength == 4);
400        TEST_ASSERT_SUCCESS(status);
401        uregex_close(re);
402    }
403
404    /*
405     *  matches()
406     */
407    {
408        UChar   text1[50];
409        UBool   result;
410        int     len;
411        UChar   nullString[] = {0,0,0};
412
413        u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
414        status = U_ZERO_ERROR;
415        u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
416        re = uregex_open(pat, -1, 0, NULL, &status);
417
418        uregex_setText(re, text1, -1, &status);
419        result = uregex_matches(re, 0, &status);
420        TEST_ASSERT(result == FALSE);
421        TEST_ASSERT_SUCCESS(status);
422
423        status = U_ZERO_ERROR;
424        uregex_setText(re, text1, 6, &status);
425        result = uregex_matches(re, 0, &status);
426        TEST_ASSERT(result == TRUE);
427        TEST_ASSERT_SUCCESS(status);
428
429        status = U_ZERO_ERROR;
430        uregex_setText(re, text1, 6, &status);
431        result = uregex_matches(re, 1, &status);
432        TEST_ASSERT(result == FALSE);
433        TEST_ASSERT_SUCCESS(status);
434        uregex_close(re);
435
436        status = U_ZERO_ERROR;
437        re = uregex_openC(".?", 0, NULL, &status);
438        uregex_setText(re, text1, -1, &status);
439        len = u_strlen(text1);
440        result = uregex_matches(re, len, &status);
441        TEST_ASSERT(result == TRUE);
442        TEST_ASSERT_SUCCESS(status);
443
444        status = U_ZERO_ERROR;
445        uregex_setText(re, nullString, -1, &status);
446        TEST_ASSERT_SUCCESS(status);
447        result = uregex_matches(re, 0, &status);
448        TEST_ASSERT(result == TRUE);
449        TEST_ASSERT_SUCCESS(status);
450        uregex_close(re);
451    }
452
453
454    /*
455     *  lookingAt()    Used in setText test.
456     */
457
458
459    /*
460     *  find(), findNext, start, end, reset
461     */
462    {
463        UChar    text1[50];
464        UBool    result;
465        u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
466        status = U_ZERO_ERROR;
467        re = uregex_openC("rx", 0, NULL, &status);
468
469        uregex_setText(re, text1, -1, &status);
470        result = uregex_find(re, 0, &status);
471        TEST_ASSERT(result == TRUE);
472        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
473        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
474        TEST_ASSERT_SUCCESS(status);
475
476        result = uregex_find(re, 9, &status);
477        TEST_ASSERT(result == TRUE);
478        TEST_ASSERT(uregex_start(re, 0, &status) == 11);
479        TEST_ASSERT(uregex_end(re, 0, &status) == 13);
480        TEST_ASSERT_SUCCESS(status);
481
482        result = uregex_find(re, 14, &status);
483        TEST_ASSERT(result == FALSE);
484        TEST_ASSERT_SUCCESS(status);
485
486        status = U_ZERO_ERROR;
487        uregex_reset(re, 0, &status);
488
489        result = uregex_findNext(re, &status);
490        TEST_ASSERT(result == TRUE);
491        TEST_ASSERT(uregex_start(re, 0, &status) == 3);
492        TEST_ASSERT(uregex_end(re, 0, &status) == 5);
493        TEST_ASSERT_SUCCESS(status);
494
495        result = uregex_findNext(re, &status);
496        TEST_ASSERT(result == TRUE);
497        TEST_ASSERT(uregex_start(re, 0, &status) == 6);
498        TEST_ASSERT(uregex_end(re, 0, &status) == 8);
499        TEST_ASSERT_SUCCESS(status);
500
501        status = U_ZERO_ERROR;
502        uregex_reset(re, 12, &status);
503
504        result = uregex_findNext(re, &status);
505        TEST_ASSERT(result == TRUE);
506        TEST_ASSERT(uregex_start(re, 0, &status) == 13);
507        TEST_ASSERT(uregex_end(re, 0, &status) == 15);
508        TEST_ASSERT_SUCCESS(status);
509
510        result = uregex_findNext(re, &status);
511        TEST_ASSERT(result == FALSE);
512        TEST_ASSERT_SUCCESS(status);
513
514        uregex_close(re);
515    }
516
517    /*
518     *  groupCount
519     */
520    {
521        int32_t result;
522
523        status = U_ZERO_ERROR;
524        re = uregex_openC("abc", 0, NULL, &status);
525        result = uregex_groupCount(re, &status);
526        TEST_ASSERT_SUCCESS(status);
527        TEST_ASSERT(result == 0);
528        uregex_close(re);
529
530        status = U_ZERO_ERROR;
531        re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
532        result = uregex_groupCount(re, &status);
533        TEST_ASSERT_SUCCESS(status);
534        TEST_ASSERT(result == 3);
535        uregex_close(re);
536
537    }
538
539
540    /*
541     *  group()
542     */
543    {
544        UChar    text1[80];
545        UChar    buf[80];
546        UBool    result;
547        int32_t  resultSz;
548        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
549
550        status = U_ZERO_ERROR;
551        re = uregex_openC("abc(.*?)def", 0, NULL, &status);
552        TEST_ASSERT_SUCCESS(status);
553
554
555        uregex_setText(re, text1, -1, &status);
556        result = uregex_find(re, 0, &status);
557        TEST_ASSERT(result==TRUE);
558
559        /*  Capture Group 0, the full match.  Should succeed.  */
560        status = U_ZERO_ERROR;
561        resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
562        TEST_ASSERT_SUCCESS(status);
563        TEST_ASSERT_STRING("abc interior def", buf, TRUE);
564        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
565
566        /*  Capture group #1.  Should succeed. */
567        status = U_ZERO_ERROR;
568        resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
569        TEST_ASSERT_SUCCESS(status);
570        TEST_ASSERT_STRING(" interior ", buf, TRUE);
571        TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
572
573        /*  Capture group out of range.  Error. */
574        status = U_ZERO_ERROR;
575        uregex_group(re, 2, buf, sizeof(buf)/2, &status);
576        TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
577
578        /* NULL buffer, pure pre-flight */
579        status = U_ZERO_ERROR;
580        resultSz = uregex_group(re, 0, NULL, 0, &status);
581        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
582        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
583
584        /* Too small buffer, truncated string */
585        status = U_ZERO_ERROR;
586        memset(buf, -1, sizeof(buf));
587        resultSz = uregex_group(re, 0, buf, 5, &status);
588        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
589        TEST_ASSERT_STRING("abc i", buf, FALSE);
590        TEST_ASSERT(buf[5] == (UChar)0xffff);
591        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
592
593        /* Output string just fits buffer, no NUL term. */
594        status = U_ZERO_ERROR;
595        resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
596        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
597        TEST_ASSERT_STRING("abc interior def", buf, FALSE);
598        TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
599        TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
600
601        uregex_close(re);
602
603    }
604
605    /*
606     *  Regions
607     */
608
609
610        /* SetRegion(), getRegion() do something  */
611        TEST_SETUP(".*", "0123456789ABCDEF", 0)
612        UChar resultString[40];
613        TEST_ASSERT(uregex_regionStart(re, &status) == 0);
614        TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
615        uregex_setRegion(re, 3, 6, &status);
616        TEST_ASSERT(uregex_regionStart(re, &status) == 3);
617        TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
618        TEST_ASSERT(uregex_findNext(re, &status));
619        TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
620        TEST_ASSERT_STRING("345", resultString, TRUE);
621        TEST_TEARDOWN;
622
623        /* find(start=-1) uses regions   */
624        TEST_SETUP(".*", "0123456789ABCDEF", 0);
625        uregex_setRegion(re, 4, 6, &status);
626        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
627        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
628        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
629        TEST_TEARDOWN;
630
631        /* find (start >=0) does not use regions   */
632        TEST_SETUP(".*", "0123456789ABCDEF", 0);
633        uregex_setRegion(re, 4, 6, &status);
634        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
635        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
636        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
637        TEST_TEARDOWN;
638
639        /* findNext() obeys regions    */
640        TEST_SETUP(".", "0123456789ABCDEF", 0);
641        uregex_setRegion(re, 4, 6, &status);
642        TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
643        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
644        TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
645        TEST_ASSERT(uregex_start(re, 0, &status) == 5);
646        TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
647        TEST_TEARDOWN;
648
649        /* matches(start=-1) uses regions                                           */
650        /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
651        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
652        uregex_setRegion(re, 4, 6, &status);
653        TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
654        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
655        TEST_ASSERT(uregex_end(re, 0, &status) == 6);
656        TEST_TEARDOWN;
657
658        /* matches (start >=0) does not use regions       */
659        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
660        uregex_setRegion(re, 4, 6, &status);
661        TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
662        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
663        TEST_ASSERT(uregex_end(re, 0, &status) == 16);
664        TEST_TEARDOWN;
665
666        /* lookingAt(start=-1) uses regions                                         */
667        /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
668        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
669        uregex_setRegion(re, 4, 6, &status);
670        TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
671        TEST_ASSERT(uregex_start(re, 0, &status) == 4);
672        TEST_ASSERT(uregex_end(re, 0, &status) == 4);
673        TEST_TEARDOWN;
674
675        /* lookingAt (start >=0) does not use regions  */
676        TEST_SETUP(".*?", "0123456789ABCDEF", 0);
677        uregex_setRegion(re, 4, 6, &status);
678        TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
679        TEST_ASSERT(uregex_start(re, 0, &status) == 0);
680        TEST_ASSERT(uregex_end(re, 0, &status) == 0);
681        TEST_TEARDOWN;
682
683        /* hitEnd()       */
684        TEST_SETUP("[a-f]*", "abcdefghij", 0);
685        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
686        TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
687        TEST_TEARDOWN;
688
689        TEST_SETUP("[a-f]*", "abcdef", 0);
690        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
691        TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
692        TEST_TEARDOWN;
693
694        /* requireEnd   */
695        TEST_SETUP("abcd", "abcd", 0);
696        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
697        TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
698        TEST_TEARDOWN;
699
700        TEST_SETUP("abcd$", "abcd", 0);
701        TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
702        TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
703        TEST_TEARDOWN;
704
705        /* anchoringBounds        */
706        TEST_SETUP("abc$", "abcdef", 0);
707        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
708        uregex_useAnchoringBounds(re, FALSE, &status);
709        TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
710
711        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
712        uregex_useAnchoringBounds(re, TRUE, &status);
713        uregex_setRegion(re, 0, 3, &status);
714        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
715        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
716        TEST_TEARDOWN;
717
718        /* Transparent Bounds      */
719        TEST_SETUP("abc(?=def)", "abcdef", 0);
720        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
721        uregex_useTransparentBounds(re, TRUE, &status);
722        TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
723
724        uregex_useTransparentBounds(re, FALSE, &status);
725        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
726        uregex_setRegion(re, 0, 3, &status);
727        TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
728        uregex_useTransparentBounds(re, TRUE, &status);
729        TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
730        TEST_ASSERT(uregex_end(re, 0, &status) == 3);
731        TEST_TEARDOWN;
732
733
734    /*
735     *  replaceFirst()
736     */
737    {
738        UChar    text1[80];
739        UChar    text2[80];
740        UChar    replText[80];
741        UChar    buf[80];
742        int32_t  resultSz;
743        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
744        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
745        u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
746
747        status = U_ZERO_ERROR;
748        re = uregex_openC("x(.*?)x", 0, NULL, &status);
749        TEST_ASSERT_SUCCESS(status);
750
751        /*  Normal case, with match */
752        uregex_setText(re, text1, -1, &status);
753        resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
754        TEST_ASSERT_SUCCESS(status);
755        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
756        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
757
758        /* No match.  Text should copy to output with no changes.  */
759        status = U_ZERO_ERROR;
760        uregex_setText(re, text2, -1, &status);
761        resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
762        TEST_ASSERT_SUCCESS(status);
763        TEST_ASSERT_STRING("No match here.", buf, TRUE);
764        TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
765
766        /*  Match, output just fills buffer, no termination warning. */
767        status = U_ZERO_ERROR;
768        uregex_setText(re, text1, -1, &status);
769        memset(buf, -1, sizeof(buf));
770        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
771        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
772        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
773        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
774        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
775
776        /* Do the replaceFirst again, without first resetting anything.
777         *  Should give the same results.
778         */
779        status = U_ZERO_ERROR;
780        memset(buf, -1, sizeof(buf));
781        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
782        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
783        TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
784        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
785        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
786
787        /* NULL buffer, zero buffer length */
788        status = U_ZERO_ERROR;
789        resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
790        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
791        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
792
793        /* Buffer too small by one */
794        status = U_ZERO_ERROR;
795        memset(buf, -1, sizeof(buf));
796        resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
797        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
798        TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
799        TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
800        TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
801
802        uregex_close(re);
803    }
804
805
806    /*
807     *  replaceAll()
808     */
809    {
810        UChar    text1[80];          /*  "Replace xaax x1x x...x." */
811        UChar    text2[80];          /*  "No match Here"           */
812        UChar    replText[80];       /*  "<$1>"                    */
813        UChar    replText2[80];      /*  "<<$1>>"                  */
814        const char * pattern = "x(.*?)x";
815        const char * expectedResult = "Replace <aa> <1> <...>.";
816        const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
817        UChar    buf[80];
818        int32_t  resultSize;
819        int32_t  expectedResultSize;
820        int32_t  expectedResultSize2;
821        int32_t  i;
822
823        u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
824        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
825        u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
826        u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
827        expectedResultSize = strlen(expectedResult);
828        expectedResultSize2 = strlen(expectedResult2);
829
830        status = U_ZERO_ERROR;
831        re = uregex_openC(pattern, 0, NULL, &status);
832        TEST_ASSERT_SUCCESS(status);
833
834        /*  Normal case, with match */
835        uregex_setText(re, text1, -1, &status);
836        resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
837        TEST_ASSERT_SUCCESS(status);
838        TEST_ASSERT_STRING(expectedResult, buf, TRUE);
839        TEST_ASSERT(resultSize == expectedResultSize);
840
841        /* No match.  Text should copy to output with no changes.  */
842        status = U_ZERO_ERROR;
843        uregex_setText(re, text2, -1, &status);
844        resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
845        TEST_ASSERT_SUCCESS(status);
846        TEST_ASSERT_STRING("No match here.", buf, TRUE);
847        TEST_ASSERT(resultSize == u_strlen(text2));
848
849        /*  Match, output just fills buffer, no termination warning. */
850        status = U_ZERO_ERROR;
851        uregex_setText(re, text1, -1, &status);
852        memset(buf, -1, sizeof(buf));
853        resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
854        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
855        TEST_ASSERT_STRING(expectedResult, buf, FALSE);
856        TEST_ASSERT(resultSize == expectedResultSize);
857        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
858
859        /* Do the replaceFirst again, without first resetting anything.
860         *  Should give the same results.
861         */
862        status = U_ZERO_ERROR;
863        memset(buf, -1, sizeof(buf));
864        resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
865        TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
866        TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
867        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
868        TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
869
870        /* NULL buffer, zero buffer length */
871        status = U_ZERO_ERROR;
872        resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
873        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
874        TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
875
876        /* Buffer too small.  Try every size, which will tickle edge cases
877         * in uregex_appendReplacement (used by replaceAll)   */
878        for (i=0; i<expectedResultSize; i++) {
879            char  expected[80];
880            status = U_ZERO_ERROR;
881            memset(buf, -1, sizeof(buf));
882            resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
883            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
884            strcpy(expected, expectedResult);
885            expected[i] = 0;
886            TEST_ASSERT_STRING(expected, buf, FALSE);
887            TEST_ASSERT(resultSize == expectedResultSize);
888            TEST_ASSERT(buf[i] == (UChar)0xffff);
889        }
890
891        /* Buffer too small.  Same as previous test, except this time the replacement
892         * text is longer than the match capture group, making the length of the complete
893         * replacement longer than the original string.
894         */
895        for (i=0; i<expectedResultSize2; i++) {
896            char  expected[80];
897            status = U_ZERO_ERROR;
898            memset(buf, -1, sizeof(buf));
899            resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
900            TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
901            strcpy(expected, expectedResult2);
902            expected[i] = 0;
903            TEST_ASSERT_STRING(expected, buf, FALSE);
904            TEST_ASSERT(resultSize == expectedResultSize2);
905            TEST_ASSERT(buf[i] == (UChar)0xffff);
906        }
907
908
909        uregex_close(re);
910    }
911
912
913    /*
914     *  appendReplacement()
915     */
916    {
917        UChar    text[100];
918        UChar    repl[100];
919        UChar    buf[100];
920        UChar   *bufPtr;
921        int32_t  bufCap;
922
923
924        status = U_ZERO_ERROR;
925        re = uregex_openC(".*", 0, 0, &status);
926        TEST_ASSERT_SUCCESS(status);
927
928        u_uastrncpy(text, "whatever",  sizeof(text)/2);
929        u_uastrncpy(repl, "some other", sizeof(repl)/2);
930        uregex_setText(re, text, -1, &status);
931
932        /* match covers whole target string */
933        uregex_find(re, 0, &status);
934        TEST_ASSERT_SUCCESS(status);
935        bufPtr = buf;
936        bufCap = sizeof(buf) / 2;
937        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
938        TEST_ASSERT_SUCCESS(status);
939        TEST_ASSERT_STRING("some other", buf, TRUE);
940
941        /* Match has \u \U escapes */
942        uregex_find(re, 0, &status);
943        TEST_ASSERT_SUCCESS(status);
944        bufPtr = buf;
945        bufCap = sizeof(buf) / 2;
946        u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
947        uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
948        TEST_ASSERT_SUCCESS(status);
949        TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
950
951        /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
952        status = U_ZERO_ERROR;
953        uregex_find(re, 0, &status);
954        TEST_ASSERT_SUCCESS(status);
955        bufPtr = buf;
956        status = U_BUFFER_OVERFLOW_ERROR;
957        uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
958        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
959
960        uregex_close(re);
961    }
962
963
964    /*
965     *  appendTail().   Checked in ReplaceFirst(), replaceAll().
966     */
967
968    /*
969     *  split()
970     */
971    {
972        UChar    textToSplit[80];
973        UChar    text2[80];
974        UChar    buf[200];
975        UChar    *fields[10];
976        int32_t  numFields;
977        int32_t  requiredCapacity;
978        int32_t  spaceNeeded;
979        int32_t  sz;
980
981        u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
982        u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
983
984        status = U_ZERO_ERROR;
985        re = uregex_openC(":", 0, NULL, &status);
986
987
988        /*  Simple split */
989
990        uregex_setText(re, textToSplit, -1, &status);
991        TEST_ASSERT_SUCCESS(status);
992
993        /* The TEST_ASSERT_SUCCESS call above should change too... */
994        if (U_SUCCESS(status)) {
995            memset(fields, -1, sizeof(fields));
996            numFields =
997                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
998            TEST_ASSERT_SUCCESS(status);
999
1000            /* The TEST_ASSERT_SUCCESS call above should change too... */
1001            if(U_SUCCESS(status)) {
1002                TEST_ASSERT(numFields == 3);
1003                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1004                TEST_ASSERT_STRING(" second", fields[1], TRUE);
1005                TEST_ASSERT_STRING("  third", fields[2], TRUE);
1006                TEST_ASSERT(fields[3] == NULL);
1007
1008                spaceNeeded = u_strlen(textToSplit) -
1009                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1010                            numFields;          /* Each field gets a NUL terminator */
1011
1012                TEST_ASSERT(spaceNeeded == requiredCapacity);
1013            }
1014        }
1015
1016        uregex_close(re);
1017
1018
1019        /*  Split with too few output strings available */
1020        status = U_ZERO_ERROR;
1021        re = uregex_openC(":", 0, NULL, &status);
1022        uregex_setText(re, textToSplit, -1, &status);
1023        TEST_ASSERT_SUCCESS(status);
1024
1025        /* The TEST_ASSERT_SUCCESS call above should change too... */
1026        if(U_SUCCESS(status)) {
1027            memset(fields, -1, sizeof(fields));
1028            numFields =
1029                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1030            TEST_ASSERT_SUCCESS(status);
1031
1032            /* The TEST_ASSERT_SUCCESS call above should change too... */
1033            if(U_SUCCESS(status)) {
1034                TEST_ASSERT(numFields == 2);
1035                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1036                TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1037                TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1038
1039                spaceNeeded = u_strlen(textToSplit) -
1040                            (numFields - 1)  +  /* Field delimiters do not appear in output */
1041                            numFields;          /* Each field gets a NUL terminator */
1042
1043                TEST_ASSERT(spaceNeeded == requiredCapacity);
1044
1045                /* Split with a range of output buffer sizes.  */
1046                spaceNeeded = u_strlen(textToSplit) -
1047                    (numFields - 1)  +  /* Field delimiters do not appear in output */
1048                    numFields;          /* Each field gets a NUL terminator */
1049
1050                for (sz=0; sz < spaceNeeded+1; sz++) {
1051                    memset(fields, -1, sizeof(fields));
1052                    status = U_ZERO_ERROR;
1053                    numFields =
1054                        uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1055                    if (sz >= spaceNeeded) {
1056                        TEST_ASSERT_SUCCESS(status);
1057                        TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1058                        TEST_ASSERT_STRING(" second", fields[1], TRUE);
1059                        TEST_ASSERT_STRING("  third", fields[2], TRUE);
1060                    } else {
1061                        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1062                    }
1063                    TEST_ASSERT(numFields == 3);
1064                    TEST_ASSERT(fields[3] == NULL);
1065                    TEST_ASSERT(spaceNeeded == requiredCapacity);
1066                }
1067            }
1068        }
1069
1070        uregex_close(re);
1071    }
1072
1073
1074
1075
1076    /* Split(), part 2.  Patterns with capture groups.  The capture group text
1077     *                   comes out as additional fields.  */
1078    {
1079        UChar    textToSplit[80];
1080        UChar    buf[200];
1081        UChar    *fields[10];
1082        int32_t  numFields;
1083        int32_t  requiredCapacity;
1084        int32_t  spaceNeeded;
1085        int32_t  sz;
1086
1087        u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
1088
1089        status = U_ZERO_ERROR;
1090        re = uregex_openC("<(.*?)>", 0, NULL, &status);
1091
1092        uregex_setText(re, textToSplit, -1, &status);
1093        TEST_ASSERT_SUCCESS(status);
1094
1095        /* The TEST_ASSERT_SUCCESS call above should change too... */
1096        if(U_SUCCESS(status)) {
1097            memset(fields, -1, sizeof(fields));
1098            numFields =
1099                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1100            TEST_ASSERT_SUCCESS(status);
1101
1102            /* The TEST_ASSERT_SUCCESS call above should change too... */
1103            if(U_SUCCESS(status)) {
1104                TEST_ASSERT(numFields == 5);
1105                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1106                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1107                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1108                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1109                TEST_ASSERT_STRING("  third", fields[4], TRUE);
1110                TEST_ASSERT(fields[5] == NULL);
1111                spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1112                TEST_ASSERT(spaceNeeded == requiredCapacity);
1113            }
1114        }
1115
1116        /*  Split with too few output strings available (2) */
1117        status = U_ZERO_ERROR;
1118        memset(fields, -1, sizeof(fields));
1119        numFields =
1120            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1121        TEST_ASSERT_SUCCESS(status);
1122
1123        /* The TEST_ASSERT_SUCCESS call above should change too... */
1124        if(U_SUCCESS(status)) {
1125            TEST_ASSERT(numFields == 2);
1126            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1127            TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1128            TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1129
1130            spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1131            TEST_ASSERT(spaceNeeded == requiredCapacity);
1132        }
1133
1134        /*  Split with too few output strings available (3) */
1135        status = U_ZERO_ERROR;
1136        memset(fields, -1, sizeof(fields));
1137        numFields =
1138            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
1139        TEST_ASSERT_SUCCESS(status);
1140
1141        /* The TEST_ASSERT_SUCCESS call above should change too... */
1142        if(U_SUCCESS(status)) {
1143            TEST_ASSERT(numFields == 3);
1144            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1145            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1146            TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1147            TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1148
1149            spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1150            TEST_ASSERT(spaceNeeded == requiredCapacity);
1151        }
1152
1153        /*  Split with just enough output strings available (5) */
1154        status = U_ZERO_ERROR;
1155        memset(fields, -1, sizeof(fields));
1156        numFields =
1157            uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
1158        TEST_ASSERT_SUCCESS(status);
1159
1160        /* The TEST_ASSERT_SUCCESS call above should change too... */
1161        if(U_SUCCESS(status)) {
1162            TEST_ASSERT(numFields == 5);
1163            TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1164            TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1165            TEST_ASSERT_STRING(" second", fields[2], TRUE);
1166            TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1167            TEST_ASSERT_STRING("  third", fields[4], TRUE);
1168            TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1169
1170            spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1171            TEST_ASSERT(spaceNeeded == requiredCapacity);
1172        }
1173
1174        /* Split, end of text is a field delimiter.   */
1175        status = U_ZERO_ERROR;
1176        sz = strlen("first <tag-a> second<tag-b>");
1177        uregex_setText(re, textToSplit, sz, &status);
1178        TEST_ASSERT_SUCCESS(status);
1179
1180        /* The TEST_ASSERT_SUCCESS call above should change too... */
1181        if(U_SUCCESS(status)) {
1182            memset(fields, -1, sizeof(fields));
1183            numFields =
1184                uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
1185            TEST_ASSERT_SUCCESS(status);
1186
1187            /* The TEST_ASSERT_SUCCESS call above should change too... */
1188            if(U_SUCCESS(status)) {
1189                TEST_ASSERT(numFields == 4);
1190                TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1191                TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1192                TEST_ASSERT_STRING(" second", fields[2], TRUE);
1193                TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1194                TEST_ASSERT(fields[4] == NULL);
1195                TEST_ASSERT(fields[8] == NULL);
1196                TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1197                spaceNeeded = strlen("first .tag-a. second.tag-b.");  /* "." at NUL positions */
1198                TEST_ASSERT(spaceNeeded == requiredCapacity);
1199            }
1200        }
1201
1202        uregex_close(re);
1203    }
1204
1205    /*
1206     * set/getTimeLimit
1207     */
1208     TEST_SETUP("abc$", "abcdef", 0);
1209     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1210     uregex_setTimeLimit(re, 1000, &status);
1211     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1212     TEST_ASSERT_SUCCESS(status);
1213     uregex_setTimeLimit(re, -1, &status);
1214     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1215     status = U_ZERO_ERROR;
1216     TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1217     TEST_TEARDOWN;
1218
1219     /*
1220      * set/get Stack Limit
1221      */
1222     TEST_SETUP("abc$", "abcdef", 0);
1223     TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1224     uregex_setStackLimit(re, 40000, &status);
1225     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1226     TEST_ASSERT_SUCCESS(status);
1227     uregex_setStackLimit(re, -1, &status);
1228     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1229     status = U_ZERO_ERROR;
1230     TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1231     TEST_TEARDOWN;
1232
1233
1234     /*
1235      * Get/Set callback functions
1236      *     This test is copied from intltest regex/Callbacks
1237      *     The pattern and test data will run long enough to cause the callback
1238      *       to be invoked.  The nested '+' operators give exponential time
1239      *       behavior with increasing string length.
1240      */
1241     TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1242     callBackContext cbInfo = {4, 0, 0};
1243     const void     *pContext   = &cbInfo;
1244     URegexMatchCallback    *returnedFn = &TestCallbackFn;
1245
1246     /*  Getting the callback fn when it hasn't been set must return NULL  */
1247     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1248     TEST_ASSERT_SUCCESS(status);
1249     TEST_ASSERT(returnedFn == NULL);
1250     TEST_ASSERT(pContext == NULL);
1251
1252     /* Set thecallback and do a match.                                   */
1253     /* The callback function should record that it has been called.      */
1254     uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1255     TEST_ASSERT_SUCCESS(status);
1256     TEST_ASSERT(cbInfo.numCalls == 0);
1257     TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1258     TEST_ASSERT_SUCCESS(status);
1259     TEST_ASSERT(cbInfo.numCalls > 0);
1260
1261     /* Getting the callback should return the values that were set above.  */
1262     uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1263     TEST_ASSERT(returnedFn == &TestCallbackFn);
1264     TEST_ASSERT(pContext == &cbInfo);
1265
1266     TEST_TEARDOWN;
1267}
1268
1269
1270
1271static void TestBug4315(void) {
1272    UErrorCode      theICUError = U_ZERO_ERROR;
1273    URegularExpression *theRegEx;
1274    UChar           *textBuff;
1275    const char      *thePattern;
1276    UChar            theString[100];
1277    UChar           *destFields[24];
1278    int32_t         neededLength1;
1279    int32_t         neededLength2;
1280
1281    int32_t         wordCount = 0;
1282    int32_t         destFieldsSize = 24;
1283
1284    thePattern  = "ck ";
1285    u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1286
1287    /* open a regex */
1288    theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1289    TEST_ASSERT_SUCCESS(theICUError);
1290
1291    /* set the input string */
1292    uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1293    TEST_ASSERT_SUCCESS(theICUError);
1294
1295    /* split */
1296    /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1297     *  error occurs! */
1298    wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1299        destFieldsSize, &theICUError);
1300
1301    TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1302    TEST_ASSERT(wordCount==3);
1303
1304    if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1305    {
1306        theICUError = U_ZERO_ERROR;
1307        textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1308        wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1309            destFields, destFieldsSize, &theICUError);
1310        TEST_ASSERT(wordCount==3);
1311        TEST_ASSERT_SUCCESS(theICUError);
1312        TEST_ASSERT(neededLength1 == neededLength2);
1313        TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1314        TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1315        TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1316        TEST_ASSERT(destFields[3] == NULL);
1317        free(textBuff);
1318    }
1319    uregex_close(theRegEx);
1320}
1321
1322#endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
1323