1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2004-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/******************************************************************************** 7* 8* File reapits.c 9* 10*********************************************************************************/ 11/*C API TEST FOR Regular Expressions */ 12/** 13* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't 14* try to test the full functionality. It just calls each function and verifies that it 15* works on a basic level. 16* 17* More complete testing of regular expression functionality is done with the C++ tests. 18**/ 19 20#include "unicode/utypes.h" 21 22#if !UCONFIG_NO_REGULAR_EXPRESSIONS 23 24#include <stdlib.h> 25#include <string.h> 26#include "unicode/uloc.h" 27#include "unicode/uregex.h" 28#include "unicode/ustring.h" 29#include "cintltst.h" 30 31#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 32log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} 33 34#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 35log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}} 36 37/* 38 * TEST_SETUP and TEST_TEARDOWN 39 * macros to handle the boilerplate around setting up regex test cases. 40 * parameteres to setup: 41 * pattern: The regex pattern, a (char *) null terminated C string. 42 * testString: The string data, also a (char *) C string. 43 * flags: Regex flags to set when compiling the pattern 44 * 45 * Put arbitrary test code between SETUP and TEARDOWN. 46 * 're" is the compiled, ready-to-go regular expression. 47 */ 48#define TEST_SETUP(pattern, testString, flags) { \ 49 UChar *srcString = NULL; \ 50 status = U_ZERO_ERROR; \ 51 re = uregex_openC(pattern, flags, NULL, &status); \ 52 TEST_ASSERT_SUCCESS(status); \ 53 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ 54 u_uastrncpy(srcString, testString, strlen(testString)+1); \ 55 uregex_setText(re, srcString, -1, &status); \ 56 TEST_ASSERT_SUCCESS(status); \ 57 if (U_SUCCESS(status)) { 58 59#define TEST_TEARDOWN \ 60 } \ 61 TEST_ASSERT_SUCCESS(status); \ 62 uregex_close(re); \ 63 free(srcString); \ 64 } 65 66 67static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) { 68 char buf_inside_macro[120]; 69 int32_t len = (int32_t)strlen(expected); 70 UBool success; 71 if (nulTerm) { 72 u_austrncpy(buf_inside_macro, (actual), len+1); 73 buf_inside_macro[len+2] = 0; 74 success = (strcmp((expected), buf_inside_macro) == 0); 75 } else { 76 u_austrncpy(buf_inside_macro, (actual), len); 77 buf_inside_macro[len+1] = 0; 78 success = (strncmp((expected), buf_inside_macro, len) == 0); 79 } 80 if (success == FALSE) { 81 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", 82 file, line, (expected), buf_inside_macro); 83 } 84} 85 86#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) 87 88 89 90 91 92static void TestRegexCAPI(void); 93static void TestBug4315(void); 94 95void addURegexTest(TestNode** root); 96 97void addURegexTest(TestNode** root) 98{ 99 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); 100 addTest(root, &TestBug4315, "regex/TestBug4315"); 101} 102 103/* 104 * Call back function and context struct used for testing 105 * regular expression user callbacks. This test is mostly the same as 106 * the corresponding C++ test in intltest. 107 */ 108typedef struct callBackContext { 109 int32_t maxCalls; 110 int32_t numCalls; 111 int32_t lastSteps; 112} callBackContext; 113 114static UBool U_EXPORT2 U_CALLCONV 115TestCallbackFn(const void *context, int32_t steps) { 116 callBackContext *info = (callBackContext *)context; 117 if (info->lastSteps+1 != steps) { 118 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 119 } 120 info->lastSteps = steps; 121 info->numCalls++; 122 return (info->numCalls < info->maxCalls); 123} 124 125/* 126 * Regular Expression C API Tests 127 */ 128static void TestRegexCAPI(void) { 129 UErrorCode status = U_ZERO_ERROR; 130 URegularExpression *re; 131 UChar pat[200]; 132 UChar *minus1; 133 134 memset(&minus1, -1, sizeof(minus1)); 135 136 /* Mimimalist open/close */ 137 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 138 re = uregex_open(pat, -1, 0, 0, &status); 139 if (U_FAILURE(status)) { 140 log_data_err("Failed to open regular expression, line %d, error is \"%s\" (Are you missing data?)\n", __LINE__, u_errorName(status)); 141 return; 142 } 143 uregex_close(re); 144 145 /* Open with all flag values set */ 146 status = U_ZERO_ERROR; 147 re = uregex_open(pat, -1, 148 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 149 0, &status); 150 TEST_ASSERT_SUCCESS(status); 151 uregex_close(re); 152 153 /* Open with an invalid flag */ 154 status = U_ZERO_ERROR; 155 re = uregex_open(pat, -1, 0x40000000, 0, &status); 156 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 157 uregex_close(re); 158 159 /* openC with an invalid parameter */ 160 status = U_ZERO_ERROR; 161 re = uregex_openC(NULL, 162 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 163 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 164 165 /* openC with an invalid parameter */ 166 status = U_USELESS_COLLATOR_ERROR; 167 re = uregex_openC(NULL, 168 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 169 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); 170 171 /* openC open from a C string */ 172 { 173 const UChar *p; 174 int32_t len; 175 status = U_ZERO_ERROR; 176 re = uregex_openC("abc*", 0, 0, &status); 177 TEST_ASSERT_SUCCESS(status); 178 p = uregex_pattern(re, &len, &status); 179 TEST_ASSERT_SUCCESS(status); 180 181 /* The TEST_ASSERT_SUCCESS above should change too... */ 182 if(U_SUCCESS(status)) { 183 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 184 TEST_ASSERT(u_strcmp(pat, p) == 0); 185 TEST_ASSERT(len==(int32_t)strlen("abc*")); 186 } 187 188 uregex_close(re); 189 190 /* TODO: Open with ParseError parameter */ 191 } 192 193 /* 194 * clone 195 */ 196 { 197 URegularExpression *clone1; 198 URegularExpression *clone2; 199 URegularExpression *clone3; 200 UChar testString1[30]; 201 UChar testString2[30]; 202 UBool result; 203 204 205 status = U_ZERO_ERROR; 206 re = uregex_openC("abc*", 0, 0, &status); 207 TEST_ASSERT_SUCCESS(status); 208 clone1 = uregex_clone(re, &status); 209 TEST_ASSERT_SUCCESS(status); 210 TEST_ASSERT(clone1 != NULL); 211 212 status = U_ZERO_ERROR; 213 clone2 = uregex_clone(re, &status); 214 TEST_ASSERT_SUCCESS(status); 215 TEST_ASSERT(clone2 != NULL); 216 uregex_close(re); 217 218 status = U_ZERO_ERROR; 219 clone3 = uregex_clone(clone2, &status); 220 TEST_ASSERT_SUCCESS(status); 221 TEST_ASSERT(clone3 != NULL); 222 223 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); 224 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); 225 226 status = U_ZERO_ERROR; 227 uregex_setText(clone1, testString1, -1, &status); 228 TEST_ASSERT_SUCCESS(status); 229 result = uregex_lookingAt(clone1, 0, &status); 230 TEST_ASSERT_SUCCESS(status); 231 TEST_ASSERT(result==TRUE); 232 233 status = U_ZERO_ERROR; 234 uregex_setText(clone2, testString2, -1, &status); 235 TEST_ASSERT_SUCCESS(status); 236 result = uregex_lookingAt(clone2, 0, &status); 237 TEST_ASSERT_SUCCESS(status); 238 TEST_ASSERT(result==FALSE); 239 result = uregex_find(clone2, 0, &status); 240 TEST_ASSERT_SUCCESS(status); 241 TEST_ASSERT(result==TRUE); 242 243 uregex_close(clone1); 244 uregex_close(clone2); 245 uregex_close(clone3); 246 247 } 248 249 /* 250 * pattern() 251 */ 252 { 253 const UChar *resultPat; 254 int32_t resultLen; 255 u_uastrncpy(pat, "hello", sizeof(pat)/2); 256 status = U_ZERO_ERROR; 257 re = uregex_open(pat, -1, 0, NULL, &status); 258 resultPat = uregex_pattern(re, &resultLen, &status); 259 TEST_ASSERT_SUCCESS(status); 260 261 /* The TEST_ASSERT_SUCCESS above should change too... */ 262 if (U_SUCCESS(status)) { 263 TEST_ASSERT(resultLen == -1); 264 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 265 } 266 267 uregex_close(re); 268 269 status = U_ZERO_ERROR; 270 re = uregex_open(pat, 3, 0, NULL, &status); 271 resultPat = uregex_pattern(re, &resultLen, &status); 272 TEST_ASSERT_SUCCESS(status); 273 TEST_ASSERT_SUCCESS(status); 274 275 /* The TEST_ASSERT_SUCCESS above should change too... */ 276 if (U_SUCCESS(status)) { 277 TEST_ASSERT(resultLen == 3); 278 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 279 TEST_ASSERT(u_strlen(resultPat) == 3); 280 } 281 282 uregex_close(re); 283 } 284 285 /* 286 * flags() 287 */ 288 { 289 int32_t t; 290 291 status = U_ZERO_ERROR; 292 re = uregex_open(pat, -1, 0, NULL, &status); 293 t = uregex_flags(re, &status); 294 TEST_ASSERT_SUCCESS(status); 295 TEST_ASSERT(t == 0); 296 uregex_close(re); 297 298 status = U_ZERO_ERROR; 299 re = uregex_open(pat, -1, 0, NULL, &status); 300 t = uregex_flags(re, &status); 301 TEST_ASSERT_SUCCESS(status); 302 TEST_ASSERT(t == 0); 303 uregex_close(re); 304 305 status = U_ZERO_ERROR; 306 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status); 307 t = uregex_flags(re, &status); 308 TEST_ASSERT_SUCCESS(status); 309 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); 310 uregex_close(re); 311 } 312 313 /* 314 * setText() and lookingAt() 315 */ 316 { 317 UChar text1[50]; 318 UChar text2[50]; 319 UBool result; 320 321 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 322 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 323 status = U_ZERO_ERROR; 324 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 325 re = uregex_open(pat, -1, 0, NULL, &status); 326 TEST_ASSERT_SUCCESS(status); 327 328 /* Operation before doing a setText should fail... */ 329 status = U_ZERO_ERROR; 330 uregex_lookingAt(re, 0, &status); 331 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 332 333 status = U_ZERO_ERROR; 334 uregex_setText(re, text1, -1, &status); 335 result = uregex_lookingAt(re, 0, &status); 336 TEST_ASSERT(result == TRUE); 337 TEST_ASSERT_SUCCESS(status); 338 339 status = U_ZERO_ERROR; 340 uregex_setText(re, text2, -1, &status); 341 result = uregex_lookingAt(re, 0, &status); 342 TEST_ASSERT(result == FALSE); 343 TEST_ASSERT_SUCCESS(status); 344 345 status = U_ZERO_ERROR; 346 uregex_setText(re, text1, -1, &status); 347 result = uregex_lookingAt(re, 0, &status); 348 TEST_ASSERT(result == TRUE); 349 TEST_ASSERT_SUCCESS(status); 350 351 status = U_ZERO_ERROR; 352 uregex_setText(re, text1, 5, &status); 353 result = uregex_lookingAt(re, 0, &status); 354 TEST_ASSERT(result == FALSE); 355 TEST_ASSERT_SUCCESS(status); 356 357 status = U_ZERO_ERROR; 358 uregex_setText(re, text1, 6, &status); 359 result = uregex_lookingAt(re, 0, &status); 360 TEST_ASSERT(result == TRUE); 361 TEST_ASSERT_SUCCESS(status); 362 363 uregex_close(re); 364 } 365 366 367 /* 368 * getText() 369 */ 370 { 371 UChar text1[50]; 372 UChar text2[50]; 373 const UChar *result; 374 int32_t textLength; 375 376 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 377 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 378 status = U_ZERO_ERROR; 379 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 380 re = uregex_open(pat, -1, 0, NULL, &status); 381 382 uregex_setText(re, text1, -1, &status); 383 result = uregex_getText(re, &textLength, &status); 384 TEST_ASSERT(result == text1); 385 TEST_ASSERT(textLength == -1); 386 TEST_ASSERT_SUCCESS(status); 387 388 status = U_ZERO_ERROR; 389 uregex_setText(re, text2, 7, &status); 390 result = uregex_getText(re, &textLength, &status); 391 TEST_ASSERT(result == text2); 392 TEST_ASSERT(textLength == 7); 393 TEST_ASSERT_SUCCESS(status); 394 395 status = U_ZERO_ERROR; 396 uregex_setText(re, text2, 4, &status); 397 result = uregex_getText(re, &textLength, &status); 398 TEST_ASSERT(result == text2); 399 TEST_ASSERT(textLength == 4); 400 TEST_ASSERT_SUCCESS(status); 401 uregex_close(re); 402 } 403 404 /* 405 * matches() 406 */ 407 { 408 UChar text1[50]; 409 UBool result; 410 int len; 411 UChar nullString[] = {0,0,0}; 412 413 u_uastrncpy(text1, "abcccde", sizeof(text1)/2); 414 status = U_ZERO_ERROR; 415 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 416 re = uregex_open(pat, -1, 0, NULL, &status); 417 418 uregex_setText(re, text1, -1, &status); 419 result = uregex_matches(re, 0, &status); 420 TEST_ASSERT(result == FALSE); 421 TEST_ASSERT_SUCCESS(status); 422 423 status = U_ZERO_ERROR; 424 uregex_setText(re, text1, 6, &status); 425 result = uregex_matches(re, 0, &status); 426 TEST_ASSERT(result == TRUE); 427 TEST_ASSERT_SUCCESS(status); 428 429 status = U_ZERO_ERROR; 430 uregex_setText(re, text1, 6, &status); 431 result = uregex_matches(re, 1, &status); 432 TEST_ASSERT(result == FALSE); 433 TEST_ASSERT_SUCCESS(status); 434 uregex_close(re); 435 436 status = U_ZERO_ERROR; 437 re = uregex_openC(".?", 0, NULL, &status); 438 uregex_setText(re, text1, -1, &status); 439 len = u_strlen(text1); 440 result = uregex_matches(re, len, &status); 441 TEST_ASSERT(result == TRUE); 442 TEST_ASSERT_SUCCESS(status); 443 444 status = U_ZERO_ERROR; 445 uregex_setText(re, nullString, -1, &status); 446 TEST_ASSERT_SUCCESS(status); 447 result = uregex_matches(re, 0, &status); 448 TEST_ASSERT(result == TRUE); 449 TEST_ASSERT_SUCCESS(status); 450 uregex_close(re); 451 } 452 453 454 /* 455 * lookingAt() Used in setText test. 456 */ 457 458 459 /* 460 * find(), findNext, start, end, reset 461 */ 462 { 463 UChar text1[50]; 464 UBool result; 465 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); 466 status = U_ZERO_ERROR; 467 re = uregex_openC("rx", 0, NULL, &status); 468 469 uregex_setText(re, text1, -1, &status); 470 result = uregex_find(re, 0, &status); 471 TEST_ASSERT(result == TRUE); 472 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 473 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 474 TEST_ASSERT_SUCCESS(status); 475 476 result = uregex_find(re, 9, &status); 477 TEST_ASSERT(result == TRUE); 478 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 479 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 480 TEST_ASSERT_SUCCESS(status); 481 482 result = uregex_find(re, 14, &status); 483 TEST_ASSERT(result == FALSE); 484 TEST_ASSERT_SUCCESS(status); 485 486 status = U_ZERO_ERROR; 487 uregex_reset(re, 0, &status); 488 489 result = uregex_findNext(re, &status); 490 TEST_ASSERT(result == TRUE); 491 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 492 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 493 TEST_ASSERT_SUCCESS(status); 494 495 result = uregex_findNext(re, &status); 496 TEST_ASSERT(result == TRUE); 497 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 498 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 499 TEST_ASSERT_SUCCESS(status); 500 501 status = U_ZERO_ERROR; 502 uregex_reset(re, 12, &status); 503 504 result = uregex_findNext(re, &status); 505 TEST_ASSERT(result == TRUE); 506 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 507 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 508 TEST_ASSERT_SUCCESS(status); 509 510 result = uregex_findNext(re, &status); 511 TEST_ASSERT(result == FALSE); 512 TEST_ASSERT_SUCCESS(status); 513 514 uregex_close(re); 515 } 516 517 /* 518 * groupCount 519 */ 520 { 521 int32_t result; 522 523 status = U_ZERO_ERROR; 524 re = uregex_openC("abc", 0, NULL, &status); 525 result = uregex_groupCount(re, &status); 526 TEST_ASSERT_SUCCESS(status); 527 TEST_ASSERT(result == 0); 528 uregex_close(re); 529 530 status = U_ZERO_ERROR; 531 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); 532 result = uregex_groupCount(re, &status); 533 TEST_ASSERT_SUCCESS(status); 534 TEST_ASSERT(result == 3); 535 uregex_close(re); 536 537 } 538 539 540 /* 541 * group() 542 */ 543 { 544 UChar text1[80]; 545 UChar buf[80]; 546 UBool result; 547 int32_t resultSz; 548 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 549 550 status = U_ZERO_ERROR; 551 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 552 TEST_ASSERT_SUCCESS(status); 553 554 555 uregex_setText(re, text1, -1, &status); 556 result = uregex_find(re, 0, &status); 557 TEST_ASSERT(result==TRUE); 558 559 /* Capture Group 0, the full match. Should succeed. */ 560 status = U_ZERO_ERROR; 561 resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); 562 TEST_ASSERT_SUCCESS(status); 563 TEST_ASSERT_STRING("abc interior def", buf, TRUE); 564 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 565 566 /* Capture group #1. Should succeed. */ 567 status = U_ZERO_ERROR; 568 resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); 569 TEST_ASSERT_SUCCESS(status); 570 TEST_ASSERT_STRING(" interior ", buf, TRUE); 571 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); 572 573 /* Capture group out of range. Error. */ 574 status = U_ZERO_ERROR; 575 uregex_group(re, 2, buf, sizeof(buf)/2, &status); 576 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 577 578 /* NULL buffer, pure pre-flight */ 579 status = U_ZERO_ERROR; 580 resultSz = uregex_group(re, 0, NULL, 0, &status); 581 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 582 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 583 584 /* Too small buffer, truncated string */ 585 status = U_ZERO_ERROR; 586 memset(buf, -1, sizeof(buf)); 587 resultSz = uregex_group(re, 0, buf, 5, &status); 588 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 589 TEST_ASSERT_STRING("abc i", buf, FALSE); 590 TEST_ASSERT(buf[5] == (UChar)0xffff); 591 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 592 593 /* Output string just fits buffer, no NUL term. */ 594 status = U_ZERO_ERROR; 595 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status); 596 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 597 TEST_ASSERT_STRING("abc interior def", buf, FALSE); 598 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 599 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); 600 601 uregex_close(re); 602 603 } 604 605 /* 606 * Regions 607 */ 608 609 610 /* SetRegion(), getRegion() do something */ 611 TEST_SETUP(".*", "0123456789ABCDEF", 0) 612 UChar resultString[40]; 613 TEST_ASSERT(uregex_regionStart(re, &status) == 0); 614 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); 615 uregex_setRegion(re, 3, 6, &status); 616 TEST_ASSERT(uregex_regionStart(re, &status) == 3); 617 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); 618 TEST_ASSERT(uregex_findNext(re, &status)); 619 TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3) 620 TEST_ASSERT_STRING("345", resultString, TRUE); 621 TEST_TEARDOWN; 622 623 /* find(start=-1) uses regions */ 624 TEST_SETUP(".*", "0123456789ABCDEF", 0); 625 uregex_setRegion(re, 4, 6, &status); 626 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 627 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 628 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 629 TEST_TEARDOWN; 630 631 /* find (start >=0) does not use regions */ 632 TEST_SETUP(".*", "0123456789ABCDEF", 0); 633 uregex_setRegion(re, 4, 6, &status); 634 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 635 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 636 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 637 TEST_TEARDOWN; 638 639 /* findNext() obeys regions */ 640 TEST_SETUP(".", "0123456789ABCDEF", 0); 641 uregex_setRegion(re, 4, 6, &status); 642 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); 643 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 644 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); 645 TEST_ASSERT(uregex_start(re, 0, &status) == 5); 646 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); 647 TEST_TEARDOWN; 648 649 /* matches(start=-1) uses regions */ 650 /* Also, verify that non-greedy *? succeeds in finding the full match. */ 651 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 652 uregex_setRegion(re, 4, 6, &status); 653 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); 654 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 655 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 656 TEST_TEARDOWN; 657 658 /* matches (start >=0) does not use regions */ 659 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 660 uregex_setRegion(re, 4, 6, &status); 661 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); 662 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 663 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 664 TEST_TEARDOWN; 665 666 /* lookingAt(start=-1) uses regions */ 667 /* Also, verify that non-greedy *? finds the first (shortest) match. */ 668 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 669 uregex_setRegion(re, 4, 6, &status); 670 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); 671 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 672 TEST_ASSERT(uregex_end(re, 0, &status) == 4); 673 TEST_TEARDOWN; 674 675 /* lookingAt (start >=0) does not use regions */ 676 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 677 uregex_setRegion(re, 4, 6, &status); 678 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); 679 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 680 TEST_ASSERT(uregex_end(re, 0, &status) == 0); 681 TEST_TEARDOWN; 682 683 /* hitEnd() */ 684 TEST_SETUP("[a-f]*", "abcdefghij", 0); 685 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 686 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); 687 TEST_TEARDOWN; 688 689 TEST_SETUP("[a-f]*", "abcdef", 0); 690 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 691 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); 692 TEST_TEARDOWN; 693 694 /* requireEnd */ 695 TEST_SETUP("abcd", "abcd", 0); 696 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 697 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); 698 TEST_TEARDOWN; 699 700 TEST_SETUP("abcd$", "abcd", 0); 701 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 702 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); 703 TEST_TEARDOWN; 704 705 /* anchoringBounds */ 706 TEST_SETUP("abc$", "abcdef", 0); 707 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); 708 uregex_useAnchoringBounds(re, FALSE, &status); 709 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); 710 711 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); 712 uregex_useAnchoringBounds(re, TRUE, &status); 713 uregex_setRegion(re, 0, 3, &status); 714 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 715 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 716 TEST_TEARDOWN; 717 718 /* Transparent Bounds */ 719 TEST_SETUP("abc(?=def)", "abcdef", 0); 720 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); 721 uregex_useTransparentBounds(re, TRUE, &status); 722 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); 723 724 uregex_useTransparentBounds(re, FALSE, &status); 725 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ 726 uregex_setRegion(re, 0, 3, &status); 727 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */ 728 uregex_useTransparentBounds(re, TRUE, &status); 729 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */ 730 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 731 TEST_TEARDOWN; 732 733 734 /* 735 * replaceFirst() 736 */ 737 { 738 UChar text1[80]; 739 UChar text2[80]; 740 UChar replText[80]; 741 UChar buf[80]; 742 int32_t resultSz; 743 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 744 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 745 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 746 747 status = U_ZERO_ERROR; 748 re = uregex_openC("x(.*?)x", 0, NULL, &status); 749 TEST_ASSERT_SUCCESS(status); 750 751 /* Normal case, with match */ 752 uregex_setText(re, text1, -1, &status); 753 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 754 TEST_ASSERT_SUCCESS(status); 755 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); 756 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 757 758 /* No match. Text should copy to output with no changes. */ 759 status = U_ZERO_ERROR; 760 uregex_setText(re, text2, -1, &status); 761 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 762 TEST_ASSERT_SUCCESS(status); 763 TEST_ASSERT_STRING("No match here.", buf, TRUE); 764 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); 765 766 /* Match, output just fills buffer, no termination warning. */ 767 status = U_ZERO_ERROR; 768 uregex_setText(re, text1, -1, &status); 769 memset(buf, -1, sizeof(buf)); 770 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 771 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 772 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 773 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 774 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 775 776 /* Do the replaceFirst again, without first resetting anything. 777 * Should give the same results. 778 */ 779 status = U_ZERO_ERROR; 780 memset(buf, -1, sizeof(buf)); 781 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 782 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 783 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 784 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 785 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 786 787 /* NULL buffer, zero buffer length */ 788 status = U_ZERO_ERROR; 789 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); 790 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 791 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 792 793 /* Buffer too small by one */ 794 status = U_ZERO_ERROR; 795 memset(buf, -1, sizeof(buf)); 796 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status); 797 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 798 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); 799 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 800 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 801 802 uregex_close(re); 803 } 804 805 806 /* 807 * replaceAll() 808 */ 809 { 810 UChar text1[80]; /* "Replace xaax x1x x...x." */ 811 UChar text2[80]; /* "No match Here" */ 812 UChar replText[80]; /* "<$1>" */ 813 UChar replText2[80]; /* "<<$1>>" */ 814 const char * pattern = "x(.*?)x"; 815 const char * expectedResult = "Replace <aa> <1> <...>."; 816 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; 817 UChar buf[80]; 818 int32_t resultSize; 819 int32_t expectedResultSize; 820 int32_t expectedResultSize2; 821 int32_t i; 822 823 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 824 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 825 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 826 u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2); 827 expectedResultSize = strlen(expectedResult); 828 expectedResultSize2 = strlen(expectedResult2); 829 830 status = U_ZERO_ERROR; 831 re = uregex_openC(pattern, 0, NULL, &status); 832 TEST_ASSERT_SUCCESS(status); 833 834 /* Normal case, with match */ 835 uregex_setText(re, text1, -1, &status); 836 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 837 TEST_ASSERT_SUCCESS(status); 838 TEST_ASSERT_STRING(expectedResult, buf, TRUE); 839 TEST_ASSERT(resultSize == expectedResultSize); 840 841 /* No match. Text should copy to output with no changes. */ 842 status = U_ZERO_ERROR; 843 uregex_setText(re, text2, -1, &status); 844 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 845 TEST_ASSERT_SUCCESS(status); 846 TEST_ASSERT_STRING("No match here.", buf, TRUE); 847 TEST_ASSERT(resultSize == u_strlen(text2)); 848 849 /* Match, output just fills buffer, no termination warning. */ 850 status = U_ZERO_ERROR; 851 uregex_setText(re, text1, -1, &status); 852 memset(buf, -1, sizeof(buf)); 853 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status); 854 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 855 TEST_ASSERT_STRING(expectedResult, buf, FALSE); 856 TEST_ASSERT(resultSize == expectedResultSize); 857 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 858 859 /* Do the replaceFirst again, without first resetting anything. 860 * Should give the same results. 861 */ 862 status = U_ZERO_ERROR; 863 memset(buf, -1, sizeof(buf)); 864 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); 865 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 866 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); 867 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 868 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 869 870 /* NULL buffer, zero buffer length */ 871 status = U_ZERO_ERROR; 872 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); 873 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 874 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 875 876 /* Buffer too small. Try every size, which will tickle edge cases 877 * in uregex_appendReplacement (used by replaceAll) */ 878 for (i=0; i<expectedResultSize; i++) { 879 char expected[80]; 880 status = U_ZERO_ERROR; 881 memset(buf, -1, sizeof(buf)); 882 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); 883 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 884 strcpy(expected, expectedResult); 885 expected[i] = 0; 886 TEST_ASSERT_STRING(expected, buf, FALSE); 887 TEST_ASSERT(resultSize == expectedResultSize); 888 TEST_ASSERT(buf[i] == (UChar)0xffff); 889 } 890 891 /* Buffer too small. Same as previous test, except this time the replacement 892 * text is longer than the match capture group, making the length of the complete 893 * replacement longer than the original string. 894 */ 895 for (i=0; i<expectedResultSize2; i++) { 896 char expected[80]; 897 status = U_ZERO_ERROR; 898 memset(buf, -1, sizeof(buf)); 899 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); 900 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 901 strcpy(expected, expectedResult2); 902 expected[i] = 0; 903 TEST_ASSERT_STRING(expected, buf, FALSE); 904 TEST_ASSERT(resultSize == expectedResultSize2); 905 TEST_ASSERT(buf[i] == (UChar)0xffff); 906 } 907 908 909 uregex_close(re); 910 } 911 912 913 /* 914 * appendReplacement() 915 */ 916 { 917 UChar text[100]; 918 UChar repl[100]; 919 UChar buf[100]; 920 UChar *bufPtr; 921 int32_t bufCap; 922 923 924 status = U_ZERO_ERROR; 925 re = uregex_openC(".*", 0, 0, &status); 926 TEST_ASSERT_SUCCESS(status); 927 928 u_uastrncpy(text, "whatever", sizeof(text)/2); 929 u_uastrncpy(repl, "some other", sizeof(repl)/2); 930 uregex_setText(re, text, -1, &status); 931 932 /* match covers whole target string */ 933 uregex_find(re, 0, &status); 934 TEST_ASSERT_SUCCESS(status); 935 bufPtr = buf; 936 bufCap = sizeof(buf) / 2; 937 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 938 TEST_ASSERT_SUCCESS(status); 939 TEST_ASSERT_STRING("some other", buf, TRUE); 940 941 /* Match has \u \U escapes */ 942 uregex_find(re, 0, &status); 943 TEST_ASSERT_SUCCESS(status); 944 bufPtr = buf; 945 bufCap = sizeof(buf) / 2; 946 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); 947 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 948 TEST_ASSERT_SUCCESS(status); 949 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 950 951 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */ 952 status = U_ZERO_ERROR; 953 uregex_find(re, 0, &status); 954 TEST_ASSERT_SUCCESS(status); 955 bufPtr = buf; 956 status = U_BUFFER_OVERFLOW_ERROR; 957 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); 958 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 959 960 uregex_close(re); 961 } 962 963 964 /* 965 * appendTail(). Checked in ReplaceFirst(), replaceAll(). 966 */ 967 968 /* 969 * split() 970 */ 971 { 972 UChar textToSplit[80]; 973 UChar text2[80]; 974 UChar buf[200]; 975 UChar *fields[10]; 976 int32_t numFields; 977 int32_t requiredCapacity; 978 int32_t spaceNeeded; 979 int32_t sz; 980 981 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); 982 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 983 984 status = U_ZERO_ERROR; 985 re = uregex_openC(":", 0, NULL, &status); 986 987 988 /* Simple split */ 989 990 uregex_setText(re, textToSplit, -1, &status); 991 TEST_ASSERT_SUCCESS(status); 992 993 /* The TEST_ASSERT_SUCCESS call above should change too... */ 994 if (U_SUCCESS(status)) { 995 memset(fields, -1, sizeof(fields)); 996 numFields = 997 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 998 TEST_ASSERT_SUCCESS(status); 999 1000 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1001 if(U_SUCCESS(status)) { 1002 TEST_ASSERT(numFields == 3); 1003 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1004 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1005 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1006 TEST_ASSERT(fields[3] == NULL); 1007 1008 spaceNeeded = u_strlen(textToSplit) - 1009 (numFields - 1) + /* Field delimiters do not appear in output */ 1010 numFields; /* Each field gets a NUL terminator */ 1011 1012 TEST_ASSERT(spaceNeeded == requiredCapacity); 1013 } 1014 } 1015 1016 uregex_close(re); 1017 1018 1019 /* Split with too few output strings available */ 1020 status = U_ZERO_ERROR; 1021 re = uregex_openC(":", 0, NULL, &status); 1022 uregex_setText(re, textToSplit, -1, &status); 1023 TEST_ASSERT_SUCCESS(status); 1024 1025 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1026 if(U_SUCCESS(status)) { 1027 memset(fields, -1, sizeof(fields)); 1028 numFields = 1029 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1030 TEST_ASSERT_SUCCESS(status); 1031 1032 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1033 if(U_SUCCESS(status)) { 1034 TEST_ASSERT(numFields == 2); 1035 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1036 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); 1037 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1038 1039 spaceNeeded = u_strlen(textToSplit) - 1040 (numFields - 1) + /* Field delimiters do not appear in output */ 1041 numFields; /* Each field gets a NUL terminator */ 1042 1043 TEST_ASSERT(spaceNeeded == requiredCapacity); 1044 1045 /* Split with a range of output buffer sizes. */ 1046 spaceNeeded = u_strlen(textToSplit) - 1047 (numFields - 1) + /* Field delimiters do not appear in output */ 1048 numFields; /* Each field gets a NUL terminator */ 1049 1050 for (sz=0; sz < spaceNeeded+1; sz++) { 1051 memset(fields, -1, sizeof(fields)); 1052 status = U_ZERO_ERROR; 1053 numFields = 1054 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status); 1055 if (sz >= spaceNeeded) { 1056 TEST_ASSERT_SUCCESS(status); 1057 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1058 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1059 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1060 } else { 1061 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1062 } 1063 TEST_ASSERT(numFields == 3); 1064 TEST_ASSERT(fields[3] == NULL); 1065 TEST_ASSERT(spaceNeeded == requiredCapacity); 1066 } 1067 } 1068 } 1069 1070 uregex_close(re); 1071 } 1072 1073 1074 1075 1076 /* Split(), part 2. Patterns with capture groups. The capture group text 1077 * comes out as additional fields. */ 1078 { 1079 UChar textToSplit[80]; 1080 UChar buf[200]; 1081 UChar *fields[10]; 1082 int32_t numFields; 1083 int32_t requiredCapacity; 1084 int32_t spaceNeeded; 1085 int32_t sz; 1086 1087 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); 1088 1089 status = U_ZERO_ERROR; 1090 re = uregex_openC("<(.*?)>", 0, NULL, &status); 1091 1092 uregex_setText(re, textToSplit, -1, &status); 1093 TEST_ASSERT_SUCCESS(status); 1094 1095 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1096 if(U_SUCCESS(status)) { 1097 memset(fields, -1, sizeof(fields)); 1098 numFields = 1099 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 1100 TEST_ASSERT_SUCCESS(status); 1101 1102 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1103 if(U_SUCCESS(status)) { 1104 TEST_ASSERT(numFields == 5); 1105 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1106 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1107 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1108 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1109 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1110 TEST_ASSERT(fields[5] == NULL); 1111 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1112 TEST_ASSERT(spaceNeeded == requiredCapacity); 1113 } 1114 } 1115 1116 /* Split with too few output strings available (2) */ 1117 status = U_ZERO_ERROR; 1118 memset(fields, -1, sizeof(fields)); 1119 numFields = 1120 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1121 TEST_ASSERT_SUCCESS(status); 1122 1123 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1124 if(U_SUCCESS(status)) { 1125 TEST_ASSERT(numFields == 2); 1126 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1127 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); 1128 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1129 1130 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */ 1131 TEST_ASSERT(spaceNeeded == requiredCapacity); 1132 } 1133 1134 /* Split with too few output strings available (3) */ 1135 status = U_ZERO_ERROR; 1136 memset(fields, -1, sizeof(fields)); 1137 numFields = 1138 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status); 1139 TEST_ASSERT_SUCCESS(status); 1140 1141 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1142 if(U_SUCCESS(status)) { 1143 TEST_ASSERT(numFields == 3); 1144 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1145 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1146 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); 1147 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); 1148 1149 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */ 1150 TEST_ASSERT(spaceNeeded == requiredCapacity); 1151 } 1152 1153 /* Split with just enough output strings available (5) */ 1154 status = U_ZERO_ERROR; 1155 memset(fields, -1, sizeof(fields)); 1156 numFields = 1157 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status); 1158 TEST_ASSERT_SUCCESS(status); 1159 1160 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1161 if(U_SUCCESS(status)) { 1162 TEST_ASSERT(numFields == 5); 1163 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1164 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1165 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1166 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1167 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1168 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); 1169 1170 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1171 TEST_ASSERT(spaceNeeded == requiredCapacity); 1172 } 1173 1174 /* Split, end of text is a field delimiter. */ 1175 status = U_ZERO_ERROR; 1176 sz = strlen("first <tag-a> second<tag-b>"); 1177 uregex_setText(re, textToSplit, sz, &status); 1178 TEST_ASSERT_SUCCESS(status); 1179 1180 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1181 if(U_SUCCESS(status)) { 1182 memset(fields, -1, sizeof(fields)); 1183 numFields = 1184 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status); 1185 TEST_ASSERT_SUCCESS(status); 1186 1187 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1188 if(U_SUCCESS(status)) { 1189 TEST_ASSERT(numFields == 4); 1190 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1191 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1192 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1193 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1194 TEST_ASSERT(fields[4] == NULL); 1195 TEST_ASSERT(fields[8] == NULL); 1196 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); 1197 spaceNeeded = strlen("first .tag-a. second.tag-b."); /* "." at NUL positions */ 1198 TEST_ASSERT(spaceNeeded == requiredCapacity); 1199 } 1200 } 1201 1202 uregex_close(re); 1203 } 1204 1205 /* 1206 * set/getTimeLimit 1207 */ 1208 TEST_SETUP("abc$", "abcdef", 0); 1209 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); 1210 uregex_setTimeLimit(re, 1000, &status); 1211 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1212 TEST_ASSERT_SUCCESS(status); 1213 uregex_setTimeLimit(re, -1, &status); 1214 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1215 status = U_ZERO_ERROR; 1216 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1217 TEST_TEARDOWN; 1218 1219 /* 1220 * set/get Stack Limit 1221 */ 1222 TEST_SETUP("abc$", "abcdef", 0); 1223 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); 1224 uregex_setStackLimit(re, 40000, &status); 1225 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1226 TEST_ASSERT_SUCCESS(status); 1227 uregex_setStackLimit(re, -1, &status); 1228 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1229 status = U_ZERO_ERROR; 1230 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1231 TEST_TEARDOWN; 1232 1233 1234 /* 1235 * Get/Set callback functions 1236 * This test is copied from intltest regex/Callbacks 1237 * The pattern and test data will run long enough to cause the callback 1238 * to be invoked. The nested '+' operators give exponential time 1239 * behavior with increasing string length. 1240 */ 1241 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) 1242 callBackContext cbInfo = {4, 0, 0}; 1243 const void *pContext = &cbInfo; 1244 URegexMatchCallback *returnedFn = &TestCallbackFn; 1245 1246 /* Getting the callback fn when it hasn't been set must return NULL */ 1247 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1248 TEST_ASSERT_SUCCESS(status); 1249 TEST_ASSERT(returnedFn == NULL); 1250 TEST_ASSERT(pContext == NULL); 1251 1252 /* Set thecallback and do a match. */ 1253 /* The callback function should record that it has been called. */ 1254 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); 1255 TEST_ASSERT_SUCCESS(status); 1256 TEST_ASSERT(cbInfo.numCalls == 0); 1257 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); 1258 TEST_ASSERT_SUCCESS(status); 1259 TEST_ASSERT(cbInfo.numCalls > 0); 1260 1261 /* Getting the callback should return the values that were set above. */ 1262 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1263 TEST_ASSERT(returnedFn == &TestCallbackFn); 1264 TEST_ASSERT(pContext == &cbInfo); 1265 1266 TEST_TEARDOWN; 1267} 1268 1269 1270 1271static void TestBug4315(void) { 1272 UErrorCode theICUError = U_ZERO_ERROR; 1273 URegularExpression *theRegEx; 1274 UChar *textBuff; 1275 const char *thePattern; 1276 UChar theString[100]; 1277 UChar *destFields[24]; 1278 int32_t neededLength1; 1279 int32_t neededLength2; 1280 1281 int32_t wordCount = 0; 1282 int32_t destFieldsSize = 24; 1283 1284 thePattern = "ck "; 1285 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle."); 1286 1287 /* open a regex */ 1288 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); 1289 TEST_ASSERT_SUCCESS(theICUError); 1290 1291 /* set the input string */ 1292 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); 1293 TEST_ASSERT_SUCCESS(theICUError); 1294 1295 /* split */ 1296 /*explicitly pass NULL and 0 to force the overflow error -> this is where the 1297 * error occurs! */ 1298 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, 1299 destFieldsSize, &theICUError); 1300 1301 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); 1302 TEST_ASSERT(wordCount==3); 1303 1304 if(theICUError == U_BUFFER_OVERFLOW_ERROR) 1305 { 1306 theICUError = U_ZERO_ERROR; 1307 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); 1308 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2, 1309 destFields, destFieldsSize, &theICUError); 1310 TEST_ASSERT(wordCount==3); 1311 TEST_ASSERT_SUCCESS(theICUError); 1312 TEST_ASSERT(neededLength1 == neededLength2); 1313 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); 1314 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE); 1315 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); 1316 TEST_ASSERT(destFields[3] == NULL); 1317 free(textBuff); 1318 } 1319 uregex_close(theRegEx); 1320} 1321 1322#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 1323