1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2004-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/******************************************************************************** 7* 8* File reapits.c 9* 10*********************************************************************************/ 11/*C API TEST FOR Regular Expressions */ 12/** 13* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't 14* try to test the full functionality. It just calls each function and verifies that it 15* works on a basic level. 16* 17* More complete testing of regular expression functionality is done with the C++ tests. 18**/ 19 20#include "unicode/utypes.h" 21 22#if !UCONFIG_NO_REGULAR_EXPRESSIONS 23 24#include <stdlib.h> 25#include <string.h> 26#include "unicode/uloc.h" 27#include "unicode/uregex.h" 28#include "unicode/ustring.h" 29#include "unicode/utext.h" 30#include "cintltst.h" 31#include "cmemory.h" 32 33#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 34log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} 35 36#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 37log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}} 38 39/* 40 * TEST_SETUP and TEST_TEARDOWN 41 * macros to handle the boilerplate around setting up regex test cases. 42 * parameteres to setup: 43 * pattern: The regex pattern, a (char *) null terminated C string. 44 * testString: The string data, also a (char *) C string. 45 * flags: Regex flags to set when compiling the pattern 46 * 47 * Put arbitrary test code between SETUP and TEARDOWN. 48 * 're" is the compiled, ready-to-go regular expression. 49 */ 50#define TEST_SETUP(pattern, testString, flags) { \ 51 UChar *srcString = NULL; \ 52 status = U_ZERO_ERROR; \ 53 re = uregex_openC(pattern, flags, NULL, &status); \ 54 TEST_ASSERT_SUCCESS(status); \ 55 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ 56 u_uastrncpy(srcString, testString, strlen(testString)+1); \ 57 uregex_setText(re, srcString, -1, &status); \ 58 TEST_ASSERT_SUCCESS(status); \ 59 if (U_SUCCESS(status)) { 60 61#define TEST_TEARDOWN \ 62 } \ 63 TEST_ASSERT_SUCCESS(status); \ 64 uregex_close(re); \ 65 free(srcString); \ 66 } 67 68 69/** 70 * @param expected utf-8 array of bytes to be expected 71 */ 72static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) { 73 char buf_inside_macro[120]; 74 int32_t len = (int32_t)strlen(expected); 75 UBool success; 76 if (nulTerm) { 77 u_austrncpy(buf_inside_macro, (actual), len+1); 78 buf_inside_macro[len+2] = 0; 79 success = (strcmp((expected), buf_inside_macro) == 0); 80 } else { 81 u_austrncpy(buf_inside_macro, (actual), len); 82 buf_inside_macro[len+1] = 0; 83 success = (strncmp((expected), buf_inside_macro, len) == 0); 84 } 85 if (success == FALSE) { 86 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", 87 file, line, (expected), buf_inside_macro); 88 } 89} 90 91#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) 92 93 94static UBool equals_utf8_utext(const char *utf8, UText *utext) { 95 int32_t u8i = 0; 96 UChar32 u8c = 0; 97 UChar32 utc = 0; 98 UBool stringsEqual = TRUE; 99 utext_setNativeIndex(utext, 0); 100 for (;;) { 101 U8_NEXT_UNSAFE(utf8, u8i, u8c); 102 utc = utext_next32(utext); 103 if (u8c == 0 && utc == U_SENTINEL) { 104 break; 105 } 106 if (u8c != utc || u8c == 0) { 107 stringsEqual = FALSE; 108 break; 109 } 110 } 111 return stringsEqual; 112} 113 114 115static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) { 116 utext_setNativeIndex(actual, 0); 117 if (!equals_utf8_utext(expected, actual)) { 118 UChar32 c; 119 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected); 120 c = utext_next32From(actual, 0); 121 while (c != U_SENTINEL) { 122 if (0x20<c && c <0x7e) { 123 log_err("%c", c); 124 } else { 125 log_err("%#x", c); 126 } 127 c = UTEXT_NEXT32(actual); 128 } 129 log_err("\"\n"); 130 } 131} 132 133/* 134 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual) 135 * Note: Expected is a UTF-8 encoded string, _not_ the system code page. 136 */ 137#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__) 138 139static UBool testUTextEqual(UText *uta, UText *utb) { 140 UChar32 ca = 0; 141 UChar32 cb = 0; 142 utext_setNativeIndex(uta, 0); 143 utext_setNativeIndex(utb, 0); 144 do { 145 ca = utext_next32(uta); 146 cb = utext_next32(utb); 147 if (ca != cb) { 148 break; 149 } 150 } while (ca != U_SENTINEL); 151 return ca == cb; 152} 153 154 155 156 157static void TestRegexCAPI(void); 158static void TestBug4315(void); 159static void TestUTextAPI(void); 160static void TestRefreshInput(void); 161static void TestBug8421(void); 162static void TestBug10815(void); 163 164void addURegexTest(TestNode** root); 165 166void addURegexTest(TestNode** root) 167{ 168 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); 169 addTest(root, &TestBug4315, "regex/TestBug4315"); 170 addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); 171 addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); 172 addTest(root, &TestBug8421, "regex/TestBug8421"); 173 addTest(root, &TestBug10815, "regex/TestBug10815"); 174} 175 176/* 177 * Call back function and context struct used for testing 178 * regular expression user callbacks. This test is mostly the same as 179 * the corresponding C++ test in intltest. 180 */ 181typedef struct callBackContext { 182 int32_t maxCalls; 183 int32_t numCalls; 184 int32_t lastSteps; 185} callBackContext; 186 187static UBool U_EXPORT2 U_CALLCONV 188TestCallbackFn(const void *context, int32_t steps) { 189 callBackContext *info = (callBackContext *)context; 190 if (info->lastSteps+1 != steps) { 191 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 192 } 193 info->lastSteps = steps; 194 info->numCalls++; 195 return (info->numCalls < info->maxCalls); 196} 197 198/* 199 * Regular Expression C API Tests 200 */ 201static void TestRegexCAPI(void) { 202 UErrorCode status = U_ZERO_ERROR; 203 URegularExpression *re; 204 UChar pat[200]; 205 UChar *minus1; 206 207 memset(&minus1, -1, sizeof(minus1)); 208 209 /* Mimimalist open/close */ 210 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); 211 re = uregex_open(pat, -1, 0, 0, &status); 212 if (U_FAILURE(status)) { 213 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 214 return; 215 } 216 uregex_close(re); 217 218 /* Open with all flag values set */ 219 status = U_ZERO_ERROR; 220 re = uregex_open(pat, -1, 221 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL, 222 0, &status); 223 TEST_ASSERT_SUCCESS(status); 224 uregex_close(re); 225 226 /* Open with an invalid flag */ 227 status = U_ZERO_ERROR; 228 re = uregex_open(pat, -1, 0x40000000, 0, &status); 229 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 230 uregex_close(re); 231 232 /* Open with an unimplemented flag */ 233 status = U_ZERO_ERROR; 234 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status); 235 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); 236 uregex_close(re); 237 238 /* openC with an invalid parameter */ 239 status = U_ZERO_ERROR; 240 re = uregex_openC(NULL, 241 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 242 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 243 244 /* openC with an invalid parameter */ 245 status = U_USELESS_COLLATOR_ERROR; 246 re = uregex_openC(NULL, 247 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 248 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); 249 250 /* openC open from a C string */ 251 { 252 const UChar *p; 253 int32_t len; 254 status = U_ZERO_ERROR; 255 re = uregex_openC("abc*", 0, 0, &status); 256 TEST_ASSERT_SUCCESS(status); 257 p = uregex_pattern(re, &len, &status); 258 TEST_ASSERT_SUCCESS(status); 259 260 /* The TEST_ASSERT_SUCCESS above should change too... */ 261 if(U_SUCCESS(status)) { 262 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); 263 TEST_ASSERT(u_strcmp(pat, p) == 0); 264 TEST_ASSERT(len==(int32_t)strlen("abc*")); 265 } 266 267 uregex_close(re); 268 269 /* TODO: Open with ParseError parameter */ 270 } 271 272 /* 273 * clone 274 */ 275 { 276 URegularExpression *clone1; 277 URegularExpression *clone2; 278 URegularExpression *clone3; 279 UChar testString1[30]; 280 UChar testString2[30]; 281 UBool result; 282 283 284 status = U_ZERO_ERROR; 285 re = uregex_openC("abc*", 0, 0, &status); 286 TEST_ASSERT_SUCCESS(status); 287 clone1 = uregex_clone(re, &status); 288 TEST_ASSERT_SUCCESS(status); 289 TEST_ASSERT(clone1 != NULL); 290 291 status = U_ZERO_ERROR; 292 clone2 = uregex_clone(re, &status); 293 TEST_ASSERT_SUCCESS(status); 294 TEST_ASSERT(clone2 != NULL); 295 uregex_close(re); 296 297 status = U_ZERO_ERROR; 298 clone3 = uregex_clone(clone2, &status); 299 TEST_ASSERT_SUCCESS(status); 300 TEST_ASSERT(clone3 != NULL); 301 302 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); 303 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); 304 305 status = U_ZERO_ERROR; 306 uregex_setText(clone1, testString1, -1, &status); 307 TEST_ASSERT_SUCCESS(status); 308 result = uregex_lookingAt(clone1, 0, &status); 309 TEST_ASSERT_SUCCESS(status); 310 TEST_ASSERT(result==TRUE); 311 312 status = U_ZERO_ERROR; 313 uregex_setText(clone2, testString2, -1, &status); 314 TEST_ASSERT_SUCCESS(status); 315 result = uregex_lookingAt(clone2, 0, &status); 316 TEST_ASSERT_SUCCESS(status); 317 TEST_ASSERT(result==FALSE); 318 result = uregex_find(clone2, 0, &status); 319 TEST_ASSERT_SUCCESS(status); 320 TEST_ASSERT(result==TRUE); 321 322 uregex_close(clone1); 323 uregex_close(clone2); 324 uregex_close(clone3); 325 326 } 327 328 /* 329 * pattern() 330 */ 331 { 332 const UChar *resultPat; 333 int32_t resultLen; 334 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); 335 status = U_ZERO_ERROR; 336 re = uregex_open(pat, -1, 0, NULL, &status); 337 resultPat = uregex_pattern(re, &resultLen, &status); 338 TEST_ASSERT_SUCCESS(status); 339 340 /* The TEST_ASSERT_SUCCESS above should change too... */ 341 if (U_SUCCESS(status)) { 342 TEST_ASSERT(resultLen == -1); 343 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 344 } 345 346 uregex_close(re); 347 348 status = U_ZERO_ERROR; 349 re = uregex_open(pat, 3, 0, NULL, &status); 350 resultPat = uregex_pattern(re, &resultLen, &status); 351 TEST_ASSERT_SUCCESS(status); 352 TEST_ASSERT_SUCCESS(status); 353 354 /* The TEST_ASSERT_SUCCESS above should change too... */ 355 if (U_SUCCESS(status)) { 356 TEST_ASSERT(resultLen == 3); 357 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 358 TEST_ASSERT(u_strlen(resultPat) == 3); 359 } 360 361 uregex_close(re); 362 } 363 364 /* 365 * flags() 366 */ 367 { 368 int32_t t; 369 370 status = U_ZERO_ERROR; 371 re = uregex_open(pat, -1, 0, NULL, &status); 372 t = uregex_flags(re, &status); 373 TEST_ASSERT_SUCCESS(status); 374 TEST_ASSERT(t == 0); 375 uregex_close(re); 376 377 status = U_ZERO_ERROR; 378 re = uregex_open(pat, -1, 0, NULL, &status); 379 t = uregex_flags(re, &status); 380 TEST_ASSERT_SUCCESS(status); 381 TEST_ASSERT(t == 0); 382 uregex_close(re); 383 384 status = U_ZERO_ERROR; 385 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status); 386 t = uregex_flags(re, &status); 387 TEST_ASSERT_SUCCESS(status); 388 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); 389 uregex_close(re); 390 } 391 392 /* 393 * setText() and lookingAt() 394 */ 395 { 396 UChar text1[50]; 397 UChar text2[50]; 398 UBool result; 399 400 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); 401 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); 402 status = U_ZERO_ERROR; 403 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); 404 re = uregex_open(pat, -1, 0, NULL, &status); 405 TEST_ASSERT_SUCCESS(status); 406 407 /* Operation before doing a setText should fail... */ 408 status = U_ZERO_ERROR; 409 uregex_lookingAt(re, 0, &status); 410 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 411 412 status = U_ZERO_ERROR; 413 uregex_setText(re, text1, -1, &status); 414 result = uregex_lookingAt(re, 0, &status); 415 TEST_ASSERT(result == TRUE); 416 TEST_ASSERT_SUCCESS(status); 417 418 status = U_ZERO_ERROR; 419 uregex_setText(re, text2, -1, &status); 420 result = uregex_lookingAt(re, 0, &status); 421 TEST_ASSERT(result == FALSE); 422 TEST_ASSERT_SUCCESS(status); 423 424 status = U_ZERO_ERROR; 425 uregex_setText(re, text1, -1, &status); 426 result = uregex_lookingAt(re, 0, &status); 427 TEST_ASSERT(result == TRUE); 428 TEST_ASSERT_SUCCESS(status); 429 430 status = U_ZERO_ERROR; 431 uregex_setText(re, text1, 5, &status); 432 result = uregex_lookingAt(re, 0, &status); 433 TEST_ASSERT(result == FALSE); 434 TEST_ASSERT_SUCCESS(status); 435 436 status = U_ZERO_ERROR; 437 uregex_setText(re, text1, 6, &status); 438 result = uregex_lookingAt(re, 0, &status); 439 TEST_ASSERT(result == TRUE); 440 TEST_ASSERT_SUCCESS(status); 441 442 uregex_close(re); 443 } 444 445 446 /* 447 * getText() 448 */ 449 { 450 UChar text1[50]; 451 UChar text2[50]; 452 const UChar *result; 453 int32_t textLength; 454 455 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); 456 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); 457 status = U_ZERO_ERROR; 458 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); 459 re = uregex_open(pat, -1, 0, NULL, &status); 460 461 uregex_setText(re, text1, -1, &status); 462 result = uregex_getText(re, &textLength, &status); 463 TEST_ASSERT(result == text1); 464 TEST_ASSERT(textLength == -1); 465 TEST_ASSERT_SUCCESS(status); 466 467 status = U_ZERO_ERROR; 468 uregex_setText(re, text2, 7, &status); 469 result = uregex_getText(re, &textLength, &status); 470 TEST_ASSERT(result == text2); 471 TEST_ASSERT(textLength == 7); 472 TEST_ASSERT_SUCCESS(status); 473 474 status = U_ZERO_ERROR; 475 uregex_setText(re, text2, 4, &status); 476 result = uregex_getText(re, &textLength, &status); 477 TEST_ASSERT(result == text2); 478 TEST_ASSERT(textLength == 4); 479 TEST_ASSERT_SUCCESS(status); 480 uregex_close(re); 481 } 482 483 /* 484 * matches() 485 */ 486 { 487 UChar text1[50]; 488 UBool result; 489 int len; 490 UChar nullString[] = {0,0,0}; 491 492 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1)); 493 status = U_ZERO_ERROR; 494 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); 495 re = uregex_open(pat, -1, 0, NULL, &status); 496 497 uregex_setText(re, text1, -1, &status); 498 result = uregex_matches(re, 0, &status); 499 TEST_ASSERT(result == FALSE); 500 TEST_ASSERT_SUCCESS(status); 501 502 status = U_ZERO_ERROR; 503 uregex_setText(re, text1, 6, &status); 504 result = uregex_matches(re, 0, &status); 505 TEST_ASSERT(result == TRUE); 506 TEST_ASSERT_SUCCESS(status); 507 508 status = U_ZERO_ERROR; 509 uregex_setText(re, text1, 6, &status); 510 result = uregex_matches(re, 1, &status); 511 TEST_ASSERT(result == FALSE); 512 TEST_ASSERT_SUCCESS(status); 513 uregex_close(re); 514 515 status = U_ZERO_ERROR; 516 re = uregex_openC(".?", 0, NULL, &status); 517 uregex_setText(re, text1, -1, &status); 518 len = u_strlen(text1); 519 result = uregex_matches(re, len, &status); 520 TEST_ASSERT(result == TRUE); 521 TEST_ASSERT_SUCCESS(status); 522 523 status = U_ZERO_ERROR; 524 uregex_setText(re, nullString, -1, &status); 525 TEST_ASSERT_SUCCESS(status); 526 result = uregex_matches(re, 0, &status); 527 TEST_ASSERT(result == TRUE); 528 TEST_ASSERT_SUCCESS(status); 529 uregex_close(re); 530 } 531 532 533 /* 534 * lookingAt() Used in setText test. 535 */ 536 537 538 /* 539 * find(), findNext, start, end, reset 540 */ 541 { 542 UChar text1[50]; 543 UBool result; 544 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); 545 status = U_ZERO_ERROR; 546 re = uregex_openC("rx", 0, NULL, &status); 547 548 uregex_setText(re, text1, -1, &status); 549 result = uregex_find(re, 0, &status); 550 TEST_ASSERT(result == TRUE); 551 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 552 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 553 TEST_ASSERT_SUCCESS(status); 554 555 result = uregex_find(re, 9, &status); 556 TEST_ASSERT(result == TRUE); 557 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 558 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 559 TEST_ASSERT_SUCCESS(status); 560 561 result = uregex_find(re, 14, &status); 562 TEST_ASSERT(result == FALSE); 563 TEST_ASSERT_SUCCESS(status); 564 565 status = U_ZERO_ERROR; 566 uregex_reset(re, 0, &status); 567 568 result = uregex_findNext(re, &status); 569 TEST_ASSERT(result == TRUE); 570 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 571 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 572 TEST_ASSERT_SUCCESS(status); 573 574 result = uregex_findNext(re, &status); 575 TEST_ASSERT(result == TRUE); 576 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 577 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 578 TEST_ASSERT_SUCCESS(status); 579 580 status = U_ZERO_ERROR; 581 uregex_reset(re, 12, &status); 582 583 result = uregex_findNext(re, &status); 584 TEST_ASSERT(result == TRUE); 585 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 586 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 587 TEST_ASSERT_SUCCESS(status); 588 589 result = uregex_findNext(re, &status); 590 TEST_ASSERT(result == FALSE); 591 TEST_ASSERT_SUCCESS(status); 592 593 uregex_close(re); 594 } 595 596 /* 597 * groupCount 598 */ 599 { 600 int32_t result; 601 602 status = U_ZERO_ERROR; 603 re = uregex_openC("abc", 0, NULL, &status); 604 result = uregex_groupCount(re, &status); 605 TEST_ASSERT_SUCCESS(status); 606 TEST_ASSERT(result == 0); 607 uregex_close(re); 608 609 status = U_ZERO_ERROR; 610 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); 611 result = uregex_groupCount(re, &status); 612 TEST_ASSERT_SUCCESS(status); 613 TEST_ASSERT(result == 3); 614 uregex_close(re); 615 616 } 617 618 619 /* 620 * group() 621 */ 622 { 623 UChar text1[80]; 624 UChar buf[80]; 625 UBool result; 626 int32_t resultSz; 627 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1)); 628 629 status = U_ZERO_ERROR; 630 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 631 TEST_ASSERT_SUCCESS(status); 632 633 634 uregex_setText(re, text1, -1, &status); 635 result = uregex_find(re, 0, &status); 636 TEST_ASSERT(result==TRUE); 637 638 /* Capture Group 0, the full match. Should succeed. */ 639 status = U_ZERO_ERROR; 640 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status); 641 TEST_ASSERT_SUCCESS(status); 642 TEST_ASSERT_STRING("abc interior def", buf, TRUE); 643 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 644 645 /* Capture group #1. Should succeed. */ 646 status = U_ZERO_ERROR; 647 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status); 648 TEST_ASSERT_SUCCESS(status); 649 TEST_ASSERT_STRING(" interior ", buf, TRUE); 650 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); 651 652 /* Capture group out of range. Error. */ 653 status = U_ZERO_ERROR; 654 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status); 655 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 656 657 /* NULL buffer, pure pre-flight */ 658 status = U_ZERO_ERROR; 659 resultSz = uregex_group(re, 0, NULL, 0, &status); 660 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 661 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 662 663 /* Too small buffer, truncated string */ 664 status = U_ZERO_ERROR; 665 memset(buf, -1, sizeof(buf)); 666 resultSz = uregex_group(re, 0, buf, 5, &status); 667 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 668 TEST_ASSERT_STRING("abc i", buf, FALSE); 669 TEST_ASSERT(buf[5] == (UChar)0xffff); 670 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 671 672 /* Output string just fits buffer, no NUL term. */ 673 status = U_ZERO_ERROR; 674 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status); 675 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 676 TEST_ASSERT_STRING("abc interior def", buf, FALSE); 677 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 678 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); 679 680 uregex_close(re); 681 682 } 683 684 /* 685 * Regions 686 */ 687 688 689 /* SetRegion(), getRegion() do something */ 690 TEST_SETUP(".*", "0123456789ABCDEF", 0) 691 UChar resultString[40]; 692 TEST_ASSERT(uregex_regionStart(re, &status) == 0); 693 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); 694 uregex_setRegion(re, 3, 6, &status); 695 TEST_ASSERT(uregex_regionStart(re, &status) == 3); 696 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); 697 TEST_ASSERT(uregex_findNext(re, &status)); 698 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3) 699 TEST_ASSERT_STRING("345", resultString, TRUE); 700 TEST_TEARDOWN; 701 702 /* find(start=-1) uses regions */ 703 TEST_SETUP(".*", "0123456789ABCDEF", 0); 704 uregex_setRegion(re, 4, 6, &status); 705 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 706 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 707 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 708 TEST_TEARDOWN; 709 710 /* find (start >=0) does not use regions */ 711 TEST_SETUP(".*", "0123456789ABCDEF", 0); 712 uregex_setRegion(re, 4, 6, &status); 713 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 714 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 715 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 716 TEST_TEARDOWN; 717 718 /* findNext() obeys regions */ 719 TEST_SETUP(".", "0123456789ABCDEF", 0); 720 uregex_setRegion(re, 4, 6, &status); 721 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); 722 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 723 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); 724 TEST_ASSERT(uregex_start(re, 0, &status) == 5); 725 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); 726 TEST_TEARDOWN; 727 728 /* matches(start=-1) uses regions */ 729 /* Also, verify that non-greedy *? succeeds in finding the full match. */ 730 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 731 uregex_setRegion(re, 4, 6, &status); 732 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); 733 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 734 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 735 TEST_TEARDOWN; 736 737 /* matches (start >=0) does not use regions */ 738 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 739 uregex_setRegion(re, 4, 6, &status); 740 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); 741 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 742 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 743 TEST_TEARDOWN; 744 745 /* lookingAt(start=-1) uses regions */ 746 /* Also, verify that non-greedy *? finds the first (shortest) match. */ 747 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 748 uregex_setRegion(re, 4, 6, &status); 749 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); 750 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 751 TEST_ASSERT(uregex_end(re, 0, &status) == 4); 752 TEST_TEARDOWN; 753 754 /* lookingAt (start >=0) does not use regions */ 755 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 756 uregex_setRegion(re, 4, 6, &status); 757 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); 758 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 759 TEST_ASSERT(uregex_end(re, 0, &status) == 0); 760 TEST_TEARDOWN; 761 762 /* hitEnd() */ 763 TEST_SETUP("[a-f]*", "abcdefghij", 0); 764 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 765 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); 766 TEST_TEARDOWN; 767 768 TEST_SETUP("[a-f]*", "abcdef", 0); 769 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 770 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); 771 TEST_TEARDOWN; 772 773 /* requireEnd */ 774 TEST_SETUP("abcd", "abcd", 0); 775 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 776 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); 777 TEST_TEARDOWN; 778 779 TEST_SETUP("abcd$", "abcd", 0); 780 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 781 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); 782 TEST_TEARDOWN; 783 784 /* anchoringBounds */ 785 TEST_SETUP("abc$", "abcdef", 0); 786 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); 787 uregex_useAnchoringBounds(re, FALSE, &status); 788 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); 789 790 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); 791 uregex_useAnchoringBounds(re, TRUE, &status); 792 uregex_setRegion(re, 0, 3, &status); 793 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 794 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 795 TEST_TEARDOWN; 796 797 /* Transparent Bounds */ 798 TEST_SETUP("abc(?=def)", "abcdef", 0); 799 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); 800 uregex_useTransparentBounds(re, TRUE, &status); 801 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); 802 803 uregex_useTransparentBounds(re, FALSE, &status); 804 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ 805 uregex_setRegion(re, 0, 3, &status); 806 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */ 807 uregex_useTransparentBounds(re, TRUE, &status); 808 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */ 809 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 810 TEST_TEARDOWN; 811 812 813 /* 814 * replaceFirst() 815 */ 816 { 817 UChar text1[80]; 818 UChar text2[80]; 819 UChar replText[80]; 820 UChar buf[80]; 821 int32_t resultSz; 822 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 823 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 824 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); 825 826 status = U_ZERO_ERROR; 827 re = uregex_openC("x(.*?)x", 0, NULL, &status); 828 TEST_ASSERT_SUCCESS(status); 829 830 /* Normal case, with match */ 831 uregex_setText(re, text1, -1, &status); 832 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 833 TEST_ASSERT_SUCCESS(status); 834 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); 835 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 836 837 /* No match. Text should copy to output with no changes. */ 838 status = U_ZERO_ERROR; 839 uregex_setText(re, text2, -1, &status); 840 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 841 TEST_ASSERT_SUCCESS(status); 842 TEST_ASSERT_STRING("No match here.", buf, TRUE); 843 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); 844 845 /* Match, output just fills buffer, no termination warning. */ 846 status = U_ZERO_ERROR; 847 uregex_setText(re, text1, -1, &status); 848 memset(buf, -1, sizeof(buf)); 849 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 850 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 851 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 852 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 853 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 854 855 /* Do the replaceFirst again, without first resetting anything. 856 * Should give the same results. 857 */ 858 status = U_ZERO_ERROR; 859 memset(buf, -1, sizeof(buf)); 860 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 861 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 862 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 863 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 864 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 865 866 /* NULL buffer, zero buffer length */ 867 status = U_ZERO_ERROR; 868 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); 869 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 870 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 871 872 /* Buffer too small by one */ 873 status = U_ZERO_ERROR; 874 memset(buf, -1, sizeof(buf)); 875 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status); 876 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 877 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); 878 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 879 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 880 881 uregex_close(re); 882 } 883 884 885 /* 886 * replaceAll() 887 */ 888 { 889 UChar text1[80]; /* "Replace xaax x1x x...x." */ 890 UChar text2[80]; /* "No match Here" */ 891 UChar replText[80]; /* "<$1>" */ 892 UChar replText2[80]; /* "<<$1>>" */ 893 const char * pattern = "x(.*?)x"; 894 const char * expectedResult = "Replace <aa> <1> <...>."; 895 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; 896 UChar buf[80]; 897 int32_t resultSize; 898 int32_t expectedResultSize; 899 int32_t expectedResultSize2; 900 int32_t i; 901 902 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 903 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 904 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); 905 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2)); 906 expectedResultSize = strlen(expectedResult); 907 expectedResultSize2 = strlen(expectedResult2); 908 909 status = U_ZERO_ERROR; 910 re = uregex_openC(pattern, 0, NULL, &status); 911 TEST_ASSERT_SUCCESS(status); 912 913 /* Normal case, with match */ 914 uregex_setText(re, text1, -1, &status); 915 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 916 TEST_ASSERT_SUCCESS(status); 917 TEST_ASSERT_STRING(expectedResult, buf, TRUE); 918 TEST_ASSERT(resultSize == expectedResultSize); 919 920 /* No match. Text should copy to output with no changes. */ 921 status = U_ZERO_ERROR; 922 uregex_setText(re, text2, -1, &status); 923 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 924 TEST_ASSERT_SUCCESS(status); 925 TEST_ASSERT_STRING("No match here.", buf, TRUE); 926 TEST_ASSERT(resultSize == u_strlen(text2)); 927 928 /* Match, output just fills buffer, no termination warning. */ 929 status = U_ZERO_ERROR; 930 uregex_setText(re, text1, -1, &status); 931 memset(buf, -1, sizeof(buf)); 932 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status); 933 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 934 TEST_ASSERT_STRING(expectedResult, buf, FALSE); 935 TEST_ASSERT(resultSize == expectedResultSize); 936 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 937 938 /* Do the replaceFirst again, without first resetting anything. 939 * Should give the same results. 940 */ 941 status = U_ZERO_ERROR; 942 memset(buf, -1, sizeof(buf)); 943 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); 944 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 945 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); 946 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 947 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 948 949 /* NULL buffer, zero buffer length */ 950 status = U_ZERO_ERROR; 951 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); 952 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 953 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 954 955 /* Buffer too small. Try every size, which will tickle edge cases 956 * in uregex_appendReplacement (used by replaceAll) */ 957 for (i=0; i<expectedResultSize; i++) { 958 char expected[80]; 959 status = U_ZERO_ERROR; 960 memset(buf, -1, sizeof(buf)); 961 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); 962 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 963 strcpy(expected, expectedResult); 964 expected[i] = 0; 965 TEST_ASSERT_STRING(expected, buf, FALSE); 966 TEST_ASSERT(resultSize == expectedResultSize); 967 TEST_ASSERT(buf[i] == (UChar)0xffff); 968 } 969 970 /* Buffer too small. Same as previous test, except this time the replacement 971 * text is longer than the match capture group, making the length of the complete 972 * replacement longer than the original string. 973 */ 974 for (i=0; i<expectedResultSize2; i++) { 975 char expected[80]; 976 status = U_ZERO_ERROR; 977 memset(buf, -1, sizeof(buf)); 978 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); 979 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 980 strcpy(expected, expectedResult2); 981 expected[i] = 0; 982 TEST_ASSERT_STRING(expected, buf, FALSE); 983 TEST_ASSERT(resultSize == expectedResultSize2); 984 TEST_ASSERT(buf[i] == (UChar)0xffff); 985 } 986 987 988 uregex_close(re); 989 } 990 991 992 /* 993 * appendReplacement() 994 */ 995 { 996 UChar text[100]; 997 UChar repl[100]; 998 UChar buf[100]; 999 UChar *bufPtr; 1000 int32_t bufCap; 1001 1002 1003 status = U_ZERO_ERROR; 1004 re = uregex_openC(".*", 0, 0, &status); 1005 TEST_ASSERT_SUCCESS(status); 1006 1007 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); 1008 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); 1009 uregex_setText(re, text, -1, &status); 1010 1011 /* match covers whole target string */ 1012 uregex_find(re, 0, &status); 1013 TEST_ASSERT_SUCCESS(status); 1014 bufPtr = buf; 1015 bufCap = UPRV_LENGTHOF(buf); 1016 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1017 TEST_ASSERT_SUCCESS(status); 1018 TEST_ASSERT_STRING("some other", buf, TRUE); 1019 1020 /* Match has \u \U escapes */ 1021 uregex_find(re, 0, &status); 1022 TEST_ASSERT_SUCCESS(status); 1023 bufPtr = buf; 1024 bufCap = UPRV_LENGTHOF(buf); 1025 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl)); 1026 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1027 TEST_ASSERT_SUCCESS(status); 1028 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1029 1030 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */ 1031 status = U_ZERO_ERROR; 1032 uregex_find(re, 0, &status); 1033 TEST_ASSERT_SUCCESS(status); 1034 bufPtr = buf; 1035 status = U_BUFFER_OVERFLOW_ERROR; 1036 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); 1037 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1038 1039 uregex_close(re); 1040 } 1041 1042 1043 /* 1044 * appendTail(). Checked in ReplaceFirst(), replaceAll(). 1045 */ 1046 1047 /* 1048 * split() 1049 */ 1050 { 1051 UChar textToSplit[80]; 1052 UChar text2[80]; 1053 UChar buf[200]; 1054 UChar *fields[10]; 1055 int32_t numFields; 1056 int32_t requiredCapacity; 1057 int32_t spaceNeeded; 1058 int32_t sz; 1059 1060 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit)); 1061 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1062 1063 status = U_ZERO_ERROR; 1064 re = uregex_openC(":", 0, NULL, &status); 1065 1066 1067 /* Simple split */ 1068 1069 uregex_setText(re, textToSplit, -1, &status); 1070 TEST_ASSERT_SUCCESS(status); 1071 1072 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1073 if (U_SUCCESS(status)) { 1074 memset(fields, -1, sizeof(fields)); 1075 numFields = 1076 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status); 1077 TEST_ASSERT_SUCCESS(status); 1078 1079 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1080 if(U_SUCCESS(status)) { 1081 TEST_ASSERT(numFields == 3); 1082 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1083 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1084 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1085 TEST_ASSERT(fields[3] == NULL); 1086 1087 spaceNeeded = u_strlen(textToSplit) - 1088 (numFields - 1) + /* Field delimiters do not appear in output */ 1089 numFields; /* Each field gets a NUL terminator */ 1090 1091 TEST_ASSERT(spaceNeeded == requiredCapacity); 1092 } 1093 } 1094 1095 uregex_close(re); 1096 1097 1098 /* Split with too few output strings available */ 1099 status = U_ZERO_ERROR; 1100 re = uregex_openC(":", 0, NULL, &status); 1101 uregex_setText(re, textToSplit, -1, &status); 1102 TEST_ASSERT_SUCCESS(status); 1103 1104 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1105 if(U_SUCCESS(status)) { 1106 memset(fields, -1, sizeof(fields)); 1107 numFields = 1108 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status); 1109 TEST_ASSERT_SUCCESS(status); 1110 1111 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1112 if(U_SUCCESS(status)) { 1113 TEST_ASSERT(numFields == 2); 1114 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1115 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); 1116 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1117 1118 spaceNeeded = u_strlen(textToSplit) - 1119 (numFields - 1) + /* Field delimiters do not appear in output */ 1120 numFields; /* Each field gets a NUL terminator */ 1121 1122 TEST_ASSERT(spaceNeeded == requiredCapacity); 1123 1124 /* Split with a range of output buffer sizes. */ 1125 spaceNeeded = u_strlen(textToSplit) - 1126 (numFields - 1) + /* Field delimiters do not appear in output */ 1127 numFields; /* Each field gets a NUL terminator */ 1128 1129 for (sz=0; sz < spaceNeeded+1; sz++) { 1130 memset(fields, -1, sizeof(fields)); 1131 status = U_ZERO_ERROR; 1132 numFields = 1133 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status); 1134 if (sz >= spaceNeeded) { 1135 TEST_ASSERT_SUCCESS(status); 1136 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1137 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1138 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1139 } else { 1140 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1141 } 1142 TEST_ASSERT(numFields == 3); 1143 TEST_ASSERT(fields[3] == NULL); 1144 TEST_ASSERT(spaceNeeded == requiredCapacity); 1145 } 1146 } 1147 } 1148 1149 uregex_close(re); 1150 } 1151 1152 1153 1154 1155 /* Split(), part 2. Patterns with capture groups. The capture group text 1156 * comes out as additional fields. */ 1157 { 1158 UChar textToSplit[80]; 1159 UChar buf[200]; 1160 UChar *fields[10]; 1161 int32_t numFields; 1162 int32_t requiredCapacity; 1163 int32_t spaceNeeded; 1164 int32_t sz; 1165 1166 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit)); 1167 1168 status = U_ZERO_ERROR; 1169 re = uregex_openC("<(.*?)>", 0, NULL, &status); 1170 1171 uregex_setText(re, textToSplit, -1, &status); 1172 TEST_ASSERT_SUCCESS(status); 1173 1174 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1175 if(U_SUCCESS(status)) { 1176 memset(fields, -1, sizeof(fields)); 1177 numFields = 1178 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status); 1179 TEST_ASSERT_SUCCESS(status); 1180 1181 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1182 if(U_SUCCESS(status)) { 1183 TEST_ASSERT(numFields == 5); 1184 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1185 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1186 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1187 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1188 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1189 TEST_ASSERT(fields[5] == NULL); 1190 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1191 TEST_ASSERT(spaceNeeded == requiredCapacity); 1192 } 1193 } 1194 1195 /* Split with too few output strings available (2) */ 1196 status = U_ZERO_ERROR; 1197 memset(fields, -1, sizeof(fields)); 1198 numFields = 1199 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status); 1200 TEST_ASSERT_SUCCESS(status); 1201 1202 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1203 if(U_SUCCESS(status)) { 1204 TEST_ASSERT(numFields == 2); 1205 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1206 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); 1207 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1208 1209 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */ 1210 TEST_ASSERT(spaceNeeded == requiredCapacity); 1211 } 1212 1213 /* Split with too few output strings available (3) */ 1214 status = U_ZERO_ERROR; 1215 memset(fields, -1, sizeof(fields)); 1216 numFields = 1217 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status); 1218 TEST_ASSERT_SUCCESS(status); 1219 1220 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1221 if(U_SUCCESS(status)) { 1222 TEST_ASSERT(numFields == 3); 1223 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1224 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1225 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); 1226 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); 1227 1228 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */ 1229 TEST_ASSERT(spaceNeeded == requiredCapacity); 1230 } 1231 1232 /* Split with just enough output strings available (5) */ 1233 status = U_ZERO_ERROR; 1234 memset(fields, -1, sizeof(fields)); 1235 numFields = 1236 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status); 1237 TEST_ASSERT_SUCCESS(status); 1238 1239 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1240 if(U_SUCCESS(status)) { 1241 TEST_ASSERT(numFields == 5); 1242 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1243 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1244 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1245 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1246 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1247 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); 1248 1249 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1250 TEST_ASSERT(spaceNeeded == requiredCapacity); 1251 } 1252 1253 /* Split, end of text is a field delimiter. */ 1254 status = U_ZERO_ERROR; 1255 sz = strlen("first <tag-a> second<tag-b>"); 1256 uregex_setText(re, textToSplit, sz, &status); 1257 TEST_ASSERT_SUCCESS(status); 1258 1259 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1260 if(U_SUCCESS(status)) { 1261 memset(fields, -1, sizeof(fields)); 1262 numFields = 1263 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status); 1264 TEST_ASSERT_SUCCESS(status); 1265 1266 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1267 if(U_SUCCESS(status)) { 1268 TEST_ASSERT(numFields == 5); 1269 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1270 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1271 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1272 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1273 TEST_ASSERT_STRING("", fields[4], TRUE); 1274 TEST_ASSERT(fields[5] == NULL); 1275 TEST_ASSERT(fields[8] == NULL); 1276 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); 1277 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */ 1278 TEST_ASSERT(spaceNeeded == requiredCapacity); 1279 } 1280 } 1281 1282 uregex_close(re); 1283 } 1284 1285 /* 1286 * set/getTimeLimit 1287 */ 1288 TEST_SETUP("abc$", "abcdef", 0); 1289 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); 1290 uregex_setTimeLimit(re, 1000, &status); 1291 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1292 TEST_ASSERT_SUCCESS(status); 1293 uregex_setTimeLimit(re, -1, &status); 1294 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1295 status = U_ZERO_ERROR; 1296 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1297 TEST_TEARDOWN; 1298 1299 /* 1300 * set/get Stack Limit 1301 */ 1302 TEST_SETUP("abc$", "abcdef", 0); 1303 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); 1304 uregex_setStackLimit(re, 40000, &status); 1305 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1306 TEST_ASSERT_SUCCESS(status); 1307 uregex_setStackLimit(re, -1, &status); 1308 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1309 status = U_ZERO_ERROR; 1310 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1311 TEST_TEARDOWN; 1312 1313 1314 /* 1315 * Get/Set callback functions 1316 * This test is copied from intltest regex/Callbacks 1317 * The pattern and test data will run long enough to cause the callback 1318 * to be invoked. The nested '+' operators give exponential time 1319 * behavior with increasing string length. 1320 */ 1321 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) 1322 callBackContext cbInfo = {4, 0, 0}; 1323 const void *pContext = &cbInfo; 1324 URegexMatchCallback *returnedFn = &TestCallbackFn; 1325 1326 /* Getting the callback fn when it hasn't been set must return NULL */ 1327 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1328 TEST_ASSERT_SUCCESS(status); 1329 TEST_ASSERT(returnedFn == NULL); 1330 TEST_ASSERT(pContext == NULL); 1331 1332 /* Set thecallback and do a match. */ 1333 /* The callback function should record that it has been called. */ 1334 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); 1335 TEST_ASSERT_SUCCESS(status); 1336 TEST_ASSERT(cbInfo.numCalls == 0); 1337 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); 1338 TEST_ASSERT_SUCCESS(status); 1339 TEST_ASSERT(cbInfo.numCalls > 0); 1340 1341 /* Getting the callback should return the values that were set above. */ 1342 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1343 TEST_ASSERT(returnedFn == &TestCallbackFn); 1344 TEST_ASSERT(pContext == &cbInfo); 1345 1346 TEST_TEARDOWN; 1347} 1348 1349 1350 1351static void TestBug4315(void) { 1352 UErrorCode theICUError = U_ZERO_ERROR; 1353 URegularExpression *theRegEx; 1354 UChar *textBuff; 1355 const char *thePattern; 1356 UChar theString[100]; 1357 UChar *destFields[24]; 1358 int32_t neededLength1; 1359 int32_t neededLength2; 1360 1361 int32_t wordCount = 0; 1362 int32_t destFieldsSize = 24; 1363 1364 thePattern = "ck "; 1365 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle."); 1366 1367 /* open a regex */ 1368 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); 1369 TEST_ASSERT_SUCCESS(theICUError); 1370 1371 /* set the input string */ 1372 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); 1373 TEST_ASSERT_SUCCESS(theICUError); 1374 1375 /* split */ 1376 /*explicitly pass NULL and 0 to force the overflow error -> this is where the 1377 * error occurs! */ 1378 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, 1379 destFieldsSize, &theICUError); 1380 1381 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); 1382 TEST_ASSERT(wordCount==3); 1383 1384 if(theICUError == U_BUFFER_OVERFLOW_ERROR) 1385 { 1386 theICUError = U_ZERO_ERROR; 1387 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); 1388 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2, 1389 destFields, destFieldsSize, &theICUError); 1390 TEST_ASSERT(wordCount==3); 1391 TEST_ASSERT_SUCCESS(theICUError); 1392 TEST_ASSERT(neededLength1 == neededLength2); 1393 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); 1394 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE); 1395 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); 1396 TEST_ASSERT(destFields[3] == NULL); 1397 free(textBuff); 1398 } 1399 uregex_close(theRegEx); 1400} 1401 1402/* Based on TestRegexCAPI() */ 1403static void TestUTextAPI(void) { 1404 UErrorCode status = U_ZERO_ERROR; 1405 URegularExpression *re; 1406 UText patternText = UTEXT_INITIALIZER; 1407 UChar pat[200]; 1408 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 }; 1409 1410 /* Mimimalist open/close */ 1411 utext_openUTF8(&patternText, patternTextUTF8, -1, &status); 1412 re = uregex_openUText(&patternText, 0, 0, &status); 1413 if (U_FAILURE(status)) { 1414 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 1415 utext_close(&patternText); 1416 return; 1417 } 1418 uregex_close(re); 1419 1420 /* Open with all flag values set */ 1421 status = U_ZERO_ERROR; 1422 re = uregex_openUText(&patternText, 1423 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 1424 0, &status); 1425 TEST_ASSERT_SUCCESS(status); 1426 uregex_close(re); 1427 1428 /* Open with an invalid flag */ 1429 status = U_ZERO_ERROR; 1430 re = uregex_openUText(&patternText, 0x40000000, 0, &status); 1431 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 1432 uregex_close(re); 1433 1434 /* open with an invalid parameter */ 1435 status = U_ZERO_ERROR; 1436 re = uregex_openUText(NULL, 1437 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 1438 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 1439 1440 /* 1441 * clone 1442 */ 1443 { 1444 URegularExpression *clone1; 1445 URegularExpression *clone2; 1446 URegularExpression *clone3; 1447 UChar testString1[30]; 1448 UChar testString2[30]; 1449 UBool result; 1450 1451 1452 status = U_ZERO_ERROR; 1453 re = uregex_openUText(&patternText, 0, 0, &status); 1454 TEST_ASSERT_SUCCESS(status); 1455 clone1 = uregex_clone(re, &status); 1456 TEST_ASSERT_SUCCESS(status); 1457 TEST_ASSERT(clone1 != NULL); 1458 1459 status = U_ZERO_ERROR; 1460 clone2 = uregex_clone(re, &status); 1461 TEST_ASSERT_SUCCESS(status); 1462 TEST_ASSERT(clone2 != NULL); 1463 uregex_close(re); 1464 1465 status = U_ZERO_ERROR; 1466 clone3 = uregex_clone(clone2, &status); 1467 TEST_ASSERT_SUCCESS(status); 1468 TEST_ASSERT(clone3 != NULL); 1469 1470 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); 1471 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); 1472 1473 status = U_ZERO_ERROR; 1474 uregex_setText(clone1, testString1, -1, &status); 1475 TEST_ASSERT_SUCCESS(status); 1476 result = uregex_lookingAt(clone1, 0, &status); 1477 TEST_ASSERT_SUCCESS(status); 1478 TEST_ASSERT(result==TRUE); 1479 1480 status = U_ZERO_ERROR; 1481 uregex_setText(clone2, testString2, -1, &status); 1482 TEST_ASSERT_SUCCESS(status); 1483 result = uregex_lookingAt(clone2, 0, &status); 1484 TEST_ASSERT_SUCCESS(status); 1485 TEST_ASSERT(result==FALSE); 1486 result = uregex_find(clone2, 0, &status); 1487 TEST_ASSERT_SUCCESS(status); 1488 TEST_ASSERT(result==TRUE); 1489 1490 uregex_close(clone1); 1491 uregex_close(clone2); 1492 uregex_close(clone3); 1493 1494 } 1495 1496 /* 1497 * pattern() and patternText() 1498 */ 1499 { 1500 const UChar *resultPat; 1501 int32_t resultLen; 1502 UText *resultText; 1503 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ 1504 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ 1505 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */ 1506 status = U_ZERO_ERROR; 1507 1508 utext_openUTF8(&patternText, str_hello, -1, &status); 1509 re = uregex_open(pat, -1, 0, NULL, &status); 1510 resultPat = uregex_pattern(re, &resultLen, &status); 1511 TEST_ASSERT_SUCCESS(status); 1512 1513 /* The TEST_ASSERT_SUCCESS above should change too... */ 1514 if (U_SUCCESS(status)) { 1515 TEST_ASSERT(resultLen == -1); 1516 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 1517 } 1518 1519 resultText = uregex_patternUText(re, &status); 1520 TEST_ASSERT_SUCCESS(status); 1521 TEST_ASSERT_UTEXT(str_hello, resultText); 1522 1523 uregex_close(re); 1524 1525 status = U_ZERO_ERROR; 1526 re = uregex_open(pat, 3, 0, NULL, &status); 1527 resultPat = uregex_pattern(re, &resultLen, &status); 1528 TEST_ASSERT_SUCCESS(status); 1529 1530 /* The TEST_ASSERT_SUCCESS above should change too... */ 1531 if (U_SUCCESS(status)) { 1532 TEST_ASSERT(resultLen == 3); 1533 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 1534 TEST_ASSERT(u_strlen(resultPat) == 3); 1535 } 1536 1537 resultText = uregex_patternUText(re, &status); 1538 TEST_ASSERT_SUCCESS(status); 1539 TEST_ASSERT_UTEXT(str_hel, resultText); 1540 1541 uregex_close(re); 1542 } 1543 1544 /* 1545 * setUText() and lookingAt() 1546 */ 1547 { 1548 UText text1 = UTEXT_INITIALIZER; 1549 UText text2 = UTEXT_INITIALIZER; 1550 UBool result; 1551 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1552 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1553 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1554 status = U_ZERO_ERROR; 1555 utext_openUTF8(&text1, str_abcccd, -1, &status); 1556 utext_openUTF8(&text2, str_abcccxd, -1, &status); 1557 1558 utext_openUTF8(&patternText, str_abcd, -1, &status); 1559 re = uregex_openUText(&patternText, 0, NULL, &status); 1560 TEST_ASSERT_SUCCESS(status); 1561 1562 /* Operation before doing a setText should fail... */ 1563 status = U_ZERO_ERROR; 1564 uregex_lookingAt(re, 0, &status); 1565 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 1566 1567 status = U_ZERO_ERROR; 1568 uregex_setUText(re, &text1, &status); 1569 result = uregex_lookingAt(re, 0, &status); 1570 TEST_ASSERT(result == TRUE); 1571 TEST_ASSERT_SUCCESS(status); 1572 1573 status = U_ZERO_ERROR; 1574 uregex_setUText(re, &text2, &status); 1575 result = uregex_lookingAt(re, 0, &status); 1576 TEST_ASSERT(result == FALSE); 1577 TEST_ASSERT_SUCCESS(status); 1578 1579 status = U_ZERO_ERROR; 1580 uregex_setUText(re, &text1, &status); 1581 result = uregex_lookingAt(re, 0, &status); 1582 TEST_ASSERT(result == TRUE); 1583 TEST_ASSERT_SUCCESS(status); 1584 1585 uregex_close(re); 1586 utext_close(&text1); 1587 utext_close(&text2); 1588 } 1589 1590 1591 /* 1592 * getText() and getUText() 1593 */ 1594 { 1595 UText text1 = UTEXT_INITIALIZER; 1596 UText text2 = UTEXT_INITIALIZER; 1597 UChar text2Chars[20]; 1598 UText *resultText; 1599 const UChar *result; 1600 int32_t textLength; 1601 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1602 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1603 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1604 1605 1606 status = U_ZERO_ERROR; 1607 utext_openUTF8(&text1, str_abcccd, -1, &status); 1608 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars)); 1609 utext_openUChars(&text2, text2Chars, -1, &status); 1610 1611 utext_openUTF8(&patternText, str_abcd, -1, &status); 1612 re = uregex_openUText(&patternText, 0, NULL, &status); 1613 1614 /* First set a UText */ 1615 uregex_setUText(re, &text1, &status); 1616 resultText = uregex_getUText(re, NULL, &status); 1617 TEST_ASSERT_SUCCESS(status); 1618 TEST_ASSERT(resultText != &text1); 1619 utext_setNativeIndex(resultText, 0); 1620 utext_setNativeIndex(&text1, 0); 1621 TEST_ASSERT(testUTextEqual(resultText, &text1)); 1622 utext_close(resultText); 1623 1624 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */ 1625 (void)result; /* Suppress set but not used warning. */ 1626 TEST_ASSERT(textLength == -1 || textLength == 6); 1627 resultText = uregex_getUText(re, NULL, &status); 1628 TEST_ASSERT_SUCCESS(status); 1629 TEST_ASSERT(resultText != &text1); 1630 utext_setNativeIndex(resultText, 0); 1631 utext_setNativeIndex(&text1, 0); 1632 TEST_ASSERT(testUTextEqual(resultText, &text1)); 1633 utext_close(resultText); 1634 1635 /* Then set a UChar * */ 1636 uregex_setText(re, text2Chars, 7, &status); 1637 resultText = uregex_getUText(re, NULL, &status); 1638 TEST_ASSERT_SUCCESS(status); 1639 utext_setNativeIndex(resultText, 0); 1640 utext_setNativeIndex(&text2, 0); 1641 TEST_ASSERT(testUTextEqual(resultText, &text2)); 1642 utext_close(resultText); 1643 result = uregex_getText(re, &textLength, &status); 1644 TEST_ASSERT(textLength == 7); 1645 1646 uregex_close(re); 1647 utext_close(&text1); 1648 utext_close(&text2); 1649 } 1650 1651 /* 1652 * matches() 1653 */ 1654 { 1655 UText text1 = UTEXT_INITIALIZER; 1656 UBool result; 1657 UText nullText = UTEXT_INITIALIZER; 1658 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */ 1659 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */ 1660 1661 status = U_ZERO_ERROR; 1662 utext_openUTF8(&text1, str_abcccde, -1, &status); 1663 utext_openUTF8(&patternText, str_abcd, -1, &status); 1664 re = uregex_openUText(&patternText, 0, NULL, &status); 1665 1666 uregex_setUText(re, &text1, &status); 1667 result = uregex_matches(re, 0, &status); 1668 TEST_ASSERT(result == FALSE); 1669 TEST_ASSERT_SUCCESS(status); 1670 uregex_close(re); 1671 1672 status = U_ZERO_ERROR; 1673 re = uregex_openC(".?", 0, NULL, &status); 1674 uregex_setUText(re, &text1, &status); 1675 result = uregex_matches(re, 7, &status); 1676 TEST_ASSERT(result == TRUE); 1677 TEST_ASSERT_SUCCESS(status); 1678 1679 status = U_ZERO_ERROR; 1680 utext_openUTF8(&nullText, "", -1, &status); 1681 uregex_setUText(re, &nullText, &status); 1682 TEST_ASSERT_SUCCESS(status); 1683 result = uregex_matches(re, 0, &status); 1684 TEST_ASSERT(result == TRUE); 1685 TEST_ASSERT_SUCCESS(status); 1686 1687 uregex_close(re); 1688 utext_close(&text1); 1689 utext_close(&nullText); 1690 } 1691 1692 1693 /* 1694 * lookingAt() Used in setText test. 1695 */ 1696 1697 1698 /* 1699 * find(), findNext, start, end, reset 1700 */ 1701 { 1702 UChar text1[50]; 1703 UBool result; 1704 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); 1705 status = U_ZERO_ERROR; 1706 re = uregex_openC("rx", 0, NULL, &status); 1707 1708 uregex_setText(re, text1, -1, &status); 1709 result = uregex_find(re, 0, &status); 1710 TEST_ASSERT(result == TRUE); 1711 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1712 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1713 TEST_ASSERT_SUCCESS(status); 1714 1715 result = uregex_find(re, 9, &status); 1716 TEST_ASSERT(result == TRUE); 1717 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 1718 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 1719 TEST_ASSERT_SUCCESS(status); 1720 1721 result = uregex_find(re, 14, &status); 1722 TEST_ASSERT(result == FALSE); 1723 TEST_ASSERT_SUCCESS(status); 1724 1725 status = U_ZERO_ERROR; 1726 uregex_reset(re, 0, &status); 1727 1728 result = uregex_findNext(re, &status); 1729 TEST_ASSERT(result == TRUE); 1730 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1731 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1732 TEST_ASSERT_SUCCESS(status); 1733 1734 result = uregex_findNext(re, &status); 1735 TEST_ASSERT(result == TRUE); 1736 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 1737 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 1738 TEST_ASSERT_SUCCESS(status); 1739 1740 status = U_ZERO_ERROR; 1741 uregex_reset(re, 12, &status); 1742 1743 result = uregex_findNext(re, &status); 1744 TEST_ASSERT(result == TRUE); 1745 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 1746 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 1747 TEST_ASSERT_SUCCESS(status); 1748 1749 result = uregex_findNext(re, &status); 1750 TEST_ASSERT(result == FALSE); 1751 TEST_ASSERT_SUCCESS(status); 1752 1753 uregex_close(re); 1754 } 1755 1756 /* 1757 * groupUText() 1758 */ 1759 { 1760 UChar text1[80]; 1761 UText *actual; 1762 UBool result; 1763 int64_t groupLen = 0; 1764 UChar groupBuf[20]; 1765 1766 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1)); 1767 1768 status = U_ZERO_ERROR; 1769 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 1770 TEST_ASSERT_SUCCESS(status); 1771 1772 uregex_setText(re, text1, -1, &status); 1773 result = uregex_find(re, 0, &status); 1774 TEST_ASSERT(result==TRUE); 1775 1776 /* Capture Group 0 with shallow clone API. Should succeed. */ 1777 status = U_ZERO_ERROR; 1778 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status); 1779 TEST_ASSERT_SUCCESS(status); 1780 1781 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */ 1782 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */ 1783 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status); 1784 1785 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE); 1786 utext_close(actual); 1787 1788 /* Capture group #1. Should succeed. */ 1789 status = U_ZERO_ERROR; 1790 1791 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status); 1792 TEST_ASSERT_SUCCESS(status); 1793 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */ 1794 /* (within the string text1) */ 1795 TEST_ASSERT(10 == groupLen); /* length of " interior " */ 1796 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status); 1797 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE); 1798 1799 utext_close(actual); 1800 1801 /* Capture group out of range. Error. */ 1802 status = U_ZERO_ERROR; 1803 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status); 1804 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1805 utext_close(actual); 1806 1807 uregex_close(re); 1808 } 1809 1810 /* 1811 * replaceFirst() 1812 */ 1813 { 1814 UChar text1[80]; 1815 UChar text2[80]; 1816 UText replText = UTEXT_INITIALIZER; 1817 UText *result; 1818 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */ 1819 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1820 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 1821 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */ 1822 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1823 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ 1824 status = U_ZERO_ERROR; 1825 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 1826 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1827 utext_openUTF8(&replText, str_1x, -1, &status); 1828 1829 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1830 TEST_ASSERT_SUCCESS(status); 1831 1832 /* Normal case, with match */ 1833 uregex_setText(re, text1, -1, &status); 1834 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1835 TEST_ASSERT_SUCCESS(status); 1836 TEST_ASSERT_UTEXT(str_Replxxx, result); 1837 utext_close(result); 1838 1839 /* No match. Text should copy to output with no changes. */ 1840 uregex_setText(re, text2, -1, &status); 1841 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1842 TEST_ASSERT_SUCCESS(status); 1843 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1844 utext_close(result); 1845 1846 /* Unicode escapes */ 1847 uregex_setText(re, text1, -1, &status); 1848 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status); 1849 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1850 TEST_ASSERT_SUCCESS(status); 1851 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result); 1852 utext_close(result); 1853 1854 uregex_close(re); 1855 utext_close(&replText); 1856 } 1857 1858 1859 /* 1860 * replaceAll() 1861 */ 1862 { 1863 UChar text1[80]; 1864 UChar text2[80]; 1865 UText replText = UTEXT_INITIALIZER; 1866 UText *result; 1867 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1868 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ 1869 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1870 status = U_ZERO_ERROR; 1871 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 1872 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1873 utext_openUTF8(&replText, str_1, -1, &status); 1874 1875 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1876 TEST_ASSERT_SUCCESS(status); 1877 1878 /* Normal case, with match */ 1879 uregex_setText(re, text1, -1, &status); 1880 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1881 TEST_ASSERT_SUCCESS(status); 1882 TEST_ASSERT_UTEXT(str_Replaceaa1, result); 1883 utext_close(result); 1884 1885 /* No match. Text should copy to output with no changes. */ 1886 uregex_setText(re, text2, -1, &status); 1887 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1888 TEST_ASSERT_SUCCESS(status); 1889 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1890 utext_close(result); 1891 1892 uregex_close(re); 1893 utext_close(&replText); 1894 } 1895 1896 1897 /* 1898 * appendReplacement() 1899 */ 1900 { 1901 UChar text[100]; 1902 UChar repl[100]; 1903 UChar buf[100]; 1904 UChar *bufPtr; 1905 int32_t bufCap; 1906 1907 status = U_ZERO_ERROR; 1908 re = uregex_openC(".*", 0, 0, &status); 1909 TEST_ASSERT_SUCCESS(status); 1910 1911 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); 1912 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); 1913 uregex_setText(re, text, -1, &status); 1914 1915 /* match covers whole target string */ 1916 uregex_find(re, 0, &status); 1917 TEST_ASSERT_SUCCESS(status); 1918 bufPtr = buf; 1919 bufCap = UPRV_LENGTHOF(buf); 1920 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1921 TEST_ASSERT_SUCCESS(status); 1922 TEST_ASSERT_STRING("some other", buf, TRUE); 1923 1924 /* Match has \u \U escapes */ 1925 uregex_find(re, 0, &status); 1926 TEST_ASSERT_SUCCESS(status); 1927 bufPtr = buf; 1928 bufCap = UPRV_LENGTHOF(buf); 1929 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl)); 1930 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1931 TEST_ASSERT_SUCCESS(status); 1932 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1933 1934 uregex_close(re); 1935 } 1936 1937 1938 /* 1939 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll(). 1940 */ 1941 1942 /* 1943 * splitUText() 1944 */ 1945 { 1946 UChar textToSplit[80]; 1947 UChar text2[80]; 1948 UText *fields[10]; 1949 int32_t numFields; 1950 int32_t i; 1951 1952 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit)); 1953 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1954 1955 status = U_ZERO_ERROR; 1956 re = uregex_openC(":", 0, NULL, &status); 1957 1958 1959 /* Simple split */ 1960 1961 uregex_setText(re, textToSplit, -1, &status); 1962 TEST_ASSERT_SUCCESS(status); 1963 1964 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1965 if (U_SUCCESS(status)) { 1966 memset(fields, 0, sizeof(fields)); 1967 numFields = uregex_splitUText(re, fields, 10, &status); 1968 TEST_ASSERT_SUCCESS(status); 1969 1970 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1971 if(U_SUCCESS(status)) { 1972 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */ 1973 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */ 1974 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */ 1975 TEST_ASSERT(numFields == 3); 1976 TEST_ASSERT_UTEXT(str_first, fields[0]); 1977 TEST_ASSERT_UTEXT(str_second, fields[1]); 1978 TEST_ASSERT_UTEXT(str_third, fields[2]); 1979 TEST_ASSERT(fields[3] == NULL); 1980 } 1981 for(i = 0; i < numFields; i++) { 1982 utext_close(fields[i]); 1983 } 1984 } 1985 1986 uregex_close(re); 1987 1988 1989 /* Split with too few output strings available */ 1990 status = U_ZERO_ERROR; 1991 re = uregex_openC(":", 0, NULL, &status); 1992 uregex_setText(re, textToSplit, -1, &status); 1993 TEST_ASSERT_SUCCESS(status); 1994 1995 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1996 if(U_SUCCESS(status)) { 1997 fields[0] = NULL; 1998 fields[1] = NULL; 1999 fields[2] = &patternText; 2000 numFields = uregex_splitUText(re, fields, 2, &status); 2001 TEST_ASSERT_SUCCESS(status); 2002 2003 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2004 if(U_SUCCESS(status)) { 2005 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2006 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */ 2007 TEST_ASSERT(numFields == 2); 2008 TEST_ASSERT_UTEXT(str_first, fields[0]); 2009 TEST_ASSERT_UTEXT(str_secondthird, fields[1]); 2010 TEST_ASSERT(fields[2] == &patternText); 2011 } 2012 for(i = 0; i < numFields; i++) { 2013 utext_close(fields[i]); 2014 } 2015 } 2016 2017 uregex_close(re); 2018 } 2019 2020 /* splitUText(), part 2. Patterns with capture groups. The capture group text 2021 * comes out as additional fields. */ 2022 { 2023 UChar textToSplit[80]; 2024 UText *fields[10]; 2025 int32_t numFields; 2026 int32_t i; 2027 2028 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit)); 2029 2030 status = U_ZERO_ERROR; 2031 re = uregex_openC("<(.*?)>", 0, NULL, &status); 2032 2033 uregex_setText(re, textToSplit, -1, &status); 2034 TEST_ASSERT_SUCCESS(status); 2035 2036 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2037 if(U_SUCCESS(status)) { 2038 memset(fields, 0, sizeof(fields)); 2039 numFields = uregex_splitUText(re, fields, 10, &status); 2040 TEST_ASSERT_SUCCESS(status); 2041 2042 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2043 if(U_SUCCESS(status)) { 2044 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2045 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2046 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2047 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2048 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2049 2050 TEST_ASSERT(numFields == 5); 2051 TEST_ASSERT_UTEXT(str_first, fields[0]); 2052 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2053 TEST_ASSERT_UTEXT(str_second, fields[2]); 2054 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2055 TEST_ASSERT_UTEXT(str_third, fields[4]); 2056 TEST_ASSERT(fields[5] == NULL); 2057 } 2058 for(i = 0; i < numFields; i++) { 2059 utext_close(fields[i]); 2060 } 2061 } 2062 2063 /* Split with too few output strings available (2) */ 2064 status = U_ZERO_ERROR; 2065 fields[0] = NULL; 2066 fields[1] = NULL; 2067 fields[2] = &patternText; 2068 numFields = uregex_splitUText(re, fields, 2, &status); 2069 TEST_ASSERT_SUCCESS(status); 2070 2071 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2072 if(U_SUCCESS(status)) { 2073 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2074 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2075 TEST_ASSERT(numFields == 2); 2076 TEST_ASSERT_UTEXT(str_first, fields[0]); 2077 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]); 2078 TEST_ASSERT(fields[2] == &patternText); 2079 } 2080 for(i = 0; i < numFields; i++) { 2081 utext_close(fields[i]); 2082 } 2083 2084 2085 /* Split with too few output strings available (3) */ 2086 status = U_ZERO_ERROR; 2087 fields[0] = NULL; 2088 fields[1] = NULL; 2089 fields[2] = NULL; 2090 fields[3] = &patternText; 2091 numFields = uregex_splitUText(re, fields, 3, &status); 2092 TEST_ASSERT_SUCCESS(status); 2093 2094 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2095 if(U_SUCCESS(status)) { 2096 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2097 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2098 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2099 TEST_ASSERT(numFields == 3); 2100 TEST_ASSERT_UTEXT(str_first, fields[0]); 2101 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2102 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]); 2103 TEST_ASSERT(fields[3] == &patternText); 2104 } 2105 for(i = 0; i < numFields; i++) { 2106 utext_close(fields[i]); 2107 } 2108 2109 /* Split with just enough output strings available (5) */ 2110 status = U_ZERO_ERROR; 2111 fields[0] = NULL; 2112 fields[1] = NULL; 2113 fields[2] = NULL; 2114 fields[3] = NULL; 2115 fields[4] = NULL; 2116 fields[5] = &patternText; 2117 numFields = uregex_splitUText(re, fields, 5, &status); 2118 TEST_ASSERT_SUCCESS(status); 2119 2120 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2121 if(U_SUCCESS(status)) { 2122 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2123 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2124 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2125 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2126 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2127 2128 TEST_ASSERT(numFields == 5); 2129 TEST_ASSERT_UTEXT(str_first, fields[0]); 2130 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2131 TEST_ASSERT_UTEXT(str_second, fields[2]); 2132 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2133 TEST_ASSERT_UTEXT(str_third, fields[4]); 2134 TEST_ASSERT(fields[5] == &patternText); 2135 } 2136 for(i = 0; i < numFields; i++) { 2137 utext_close(fields[i]); 2138 } 2139 2140 /* Split, end of text is a field delimiter. */ 2141 status = U_ZERO_ERROR; 2142 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status); 2143 TEST_ASSERT_SUCCESS(status); 2144 2145 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2146 if(U_SUCCESS(status)) { 2147 memset(fields, 0, sizeof(fields)); 2148 fields[9] = &patternText; 2149 numFields = uregex_splitUText(re, fields, 9, &status); 2150 TEST_ASSERT_SUCCESS(status); 2151 2152 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2153 if(U_SUCCESS(status)) { 2154 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2155 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2156 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2157 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2158 const char str_empty[] = { 0x00 }; 2159 2160 TEST_ASSERT(numFields == 5); 2161 TEST_ASSERT_UTEXT(str_first, fields[0]); 2162 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2163 TEST_ASSERT_UTEXT(str_second, fields[2]); 2164 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2165 TEST_ASSERT_UTEXT(str_empty, fields[4]); 2166 TEST_ASSERT(fields[5] == NULL); 2167 TEST_ASSERT(fields[8] == NULL); 2168 TEST_ASSERT(fields[9] == &patternText); 2169 } 2170 for(i = 0; i < numFields; i++) { 2171 utext_close(fields[i]); 2172 } 2173 } 2174 2175 uregex_close(re); 2176 } 2177 utext_close(&patternText); 2178} 2179 2180 2181static void TestRefreshInput(void) { 2182 /* 2183 * RefreshInput changes out the input of a URegularExpression without 2184 * changing anything else in the match state. Used with Java JNI, 2185 * when Java moves the underlying string storage. This test 2186 * runs a find() loop, moving the text after the first match. 2187 * The right number of matches should still be found. 2188 */ 2189 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */ 2190 UChar movedStr[] = { 0, 0, 0, 0, 0, 0}; 2191 UErrorCode status = U_ZERO_ERROR; 2192 URegularExpression *re; 2193 UText ut1 = UTEXT_INITIALIZER; 2194 UText ut2 = UTEXT_INITIALIZER; 2195 2196 re = uregex_openC("[ABC]", 0, 0, &status); 2197 TEST_ASSERT_SUCCESS(status); 2198 2199 utext_openUChars(&ut1, testStr, -1, &status); 2200 TEST_ASSERT_SUCCESS(status); 2201 uregex_setUText(re, &ut1, &status); 2202 TEST_ASSERT_SUCCESS(status); 2203 2204 /* Find the first match "A" in the original string */ 2205 TEST_ASSERT(uregex_findNext(re, &status)); 2206 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 2207 2208 /* Move the string, kill the original string. */ 2209 u_strcpy(movedStr, testStr); 2210 u_memset(testStr, 0, u_strlen(testStr)); 2211 utext_openUChars(&ut2, movedStr, -1, &status); 2212 TEST_ASSERT_SUCCESS(status); 2213 uregex_refreshUText(re, &ut2, &status); 2214 TEST_ASSERT_SUCCESS(status); 2215 2216 /* Find the following two matches, now working in the moved string. */ 2217 TEST_ASSERT(uregex_findNext(re, &status)); 2218 TEST_ASSERT(uregex_start(re, 0, &status) == 2); 2219 TEST_ASSERT(uregex_findNext(re, &status)); 2220 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 2221 TEST_ASSERT(FALSE == uregex_findNext(re, &status)); 2222 2223 uregex_close(re); 2224} 2225 2226 2227static void TestBug8421(void) { 2228 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched 2229 * was failing. 2230 */ 2231 URegularExpression *re; 2232 UErrorCode status = U_ZERO_ERROR; 2233 int32_t limit = -1; 2234 2235 re = uregex_openC("abc", 0, 0, &status); 2236 TEST_ASSERT_SUCCESS(status); 2237 2238 limit = uregex_getTimeLimit(re, &status); 2239 TEST_ASSERT_SUCCESS(status); 2240 TEST_ASSERT(limit == 0); 2241 2242 uregex_setTimeLimit(re, 100, &status); 2243 TEST_ASSERT_SUCCESS(status); 2244 limit = uregex_getTimeLimit(re, &status); 2245 TEST_ASSERT_SUCCESS(status); 2246 TEST_ASSERT(limit == 100); 2247 2248 uregex_close(re); 2249} 2250 2251static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) { 2252 return FALSE; 2253} 2254 2255static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) { 2256 return FALSE; 2257} 2258 2259static void TestBug10815() { 2260 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER 2261 * when the callback function specified by uregex_setMatchCallback() returns FALSE 2262 */ 2263 URegularExpression *re; 2264 UErrorCode status = U_ZERO_ERROR; 2265 UChar text[100]; 2266 2267 2268 // findNext() with a find progress callback function. 2269 2270 re = uregex_openC(".z", 0, 0, &status); 2271 TEST_ASSERT_SUCCESS(status); 2272 2273 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text)); 2274 uregex_setText(re, text, -1, &status); 2275 TEST_ASSERT_SUCCESS(status); 2276 2277 uregex_setFindProgressCallback(re, FindCallback, NULL, &status); 2278 TEST_ASSERT_SUCCESS(status); 2279 2280 uregex_findNext(re, &status); 2281 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 2282 2283 uregex_close(re); 2284 2285 // findNext() with a match progress callback function. 2286 2287 status = U_ZERO_ERROR; 2288 re = uregex_openC("((xxx)*)*y", 0, 0, &status); 2289 TEST_ASSERT_SUCCESS(status); 2290 2291 // Pattern + this text gives an exponential time match. Without the callback to stop the match, 2292 // it will appear to be stuck in a (near) infinite loop. 2293 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text)); 2294 uregex_setText(re, text, -1, &status); 2295 TEST_ASSERT_SUCCESS(status); 2296 2297 uregex_setMatchCallback(re, MatchCallback, NULL, &status); 2298 TEST_ASSERT_SUCCESS(status); 2299 2300 uregex_findNext(re, &status); 2301 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 2302 2303 uregex_close(re); 2304} 2305 2306 2307#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 2308