1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 2004-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8/******************************************************************************** 9* 10* File reapits.c 11* 12*********************************************************************************/ 13/*C API TEST FOR Regular Expressions */ 14/** 15* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't 16* try to test the full functionality. It just calls each function and verifies that it 17* works on a basic level. 18* 19* More complete testing of regular expression functionality is done with the C++ tests. 20**/ 21 22#include "unicode/utypes.h" 23 24#if !UCONFIG_NO_REGULAR_EXPRESSIONS 25 26#include <stdlib.h> 27#include <string.h> 28#include "unicode/uloc.h" 29#include "unicode/uregex.h" 30#include "unicode/ustring.h" 31#include "unicode/utext.h" 32#include "unicode/utf8.h" 33#include "cintltst.h" 34#include "cmemory.h" 35 36#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 37log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} 38 39#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 40log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}} 41 42/* 43 * TEST_SETUP and TEST_TEARDOWN 44 * macros to handle the boilerplate around setting up regex test cases. 45 * parameteres to setup: 46 * pattern: The regex pattern, a (char *) null terminated C string. 47 * testString: The string data, also a (char *) C string. 48 * flags: Regex flags to set when compiling the pattern 49 * 50 * Put arbitrary test code between SETUP and TEARDOWN. 51 * 're" is the compiled, ready-to-go regular expression. 52 */ 53#define TEST_SETUP(pattern, testString, flags) { \ 54 UChar *srcString = NULL; \ 55 status = U_ZERO_ERROR; \ 56 re = uregex_openC(pattern, flags, NULL, &status); \ 57 TEST_ASSERT_SUCCESS(status); \ 58 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ 59 u_uastrncpy(srcString, testString, strlen(testString)+1); \ 60 uregex_setText(re, srcString, -1, &status); \ 61 TEST_ASSERT_SUCCESS(status); \ 62 if (U_SUCCESS(status)) { 63 64#define TEST_TEARDOWN \ 65 } \ 66 TEST_ASSERT_SUCCESS(status); \ 67 uregex_close(re); \ 68 free(srcString); \ 69 } 70 71 72/** 73 * @param expected utf-8 array of bytes to be expected 74 */ 75static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) { 76 char buf_inside_macro[120]; 77 int32_t len = (int32_t)strlen(expected); 78 UBool success; 79 if (nulTerm) { 80 u_austrncpy(buf_inside_macro, (actual), len+1); 81 buf_inside_macro[len+2] = 0; 82 success = (strcmp((expected), buf_inside_macro) == 0); 83 } else { 84 u_austrncpy(buf_inside_macro, (actual), len); 85 buf_inside_macro[len+1] = 0; 86 success = (strncmp((expected), buf_inside_macro, len) == 0); 87 } 88 if (success == FALSE) { 89 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", 90 file, line, (expected), buf_inside_macro); 91 } 92} 93 94#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) 95 96 97static UBool equals_utf8_utext(const char *utf8, UText *utext) { 98 int32_t u8i = 0; 99 UChar32 u8c = 0; 100 UChar32 utc = 0; 101 UBool stringsEqual = TRUE; 102 utext_setNativeIndex(utext, 0); 103 for (;;) { 104 U8_NEXT_UNSAFE(utf8, u8i, u8c); 105 utc = utext_next32(utext); 106 if (u8c == 0 && utc == U_SENTINEL) { 107 break; 108 } 109 if (u8c != utc || u8c == 0) { 110 stringsEqual = FALSE; 111 break; 112 } 113 } 114 return stringsEqual; 115} 116 117 118static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) { 119 utext_setNativeIndex(actual, 0); 120 if (!equals_utf8_utext(expected, actual)) { 121 UChar32 c; 122 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected); 123 c = utext_next32From(actual, 0); 124 while (c != U_SENTINEL) { 125 if (0x20<c && c <0x7e) { 126 log_err("%c", c); 127 } else { 128 log_err("%#x", c); 129 } 130 c = UTEXT_NEXT32(actual); 131 } 132 log_err("\"\n"); 133 } 134} 135 136/* 137 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual) 138 * Note: Expected is a UTF-8 encoded string, _not_ the system code page. 139 */ 140#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__) 141 142static UBool testUTextEqual(UText *uta, UText *utb) { 143 UChar32 ca = 0; 144 UChar32 cb = 0; 145 utext_setNativeIndex(uta, 0); 146 utext_setNativeIndex(utb, 0); 147 do { 148 ca = utext_next32(uta); 149 cb = utext_next32(utb); 150 if (ca != cb) { 151 break; 152 } 153 } while (ca != U_SENTINEL); 154 return ca == cb; 155} 156 157 158 159 160static void TestRegexCAPI(void); 161static void TestBug4315(void); 162static void TestUTextAPI(void); 163static void TestRefreshInput(void); 164static void TestBug8421(void); 165static void TestBug10815(void); 166 167void addURegexTest(TestNode** root); 168 169void addURegexTest(TestNode** root) 170{ 171 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); 172 addTest(root, &TestBug4315, "regex/TestBug4315"); 173 addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); 174 addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); 175 addTest(root, &TestBug8421, "regex/TestBug8421"); 176 addTest(root, &TestBug10815, "regex/TestBug10815"); 177} 178 179/* 180 * Call back function and context struct used for testing 181 * regular expression user callbacks. This test is mostly the same as 182 * the corresponding C++ test in intltest. 183 */ 184typedef struct callBackContext { 185 int32_t maxCalls; 186 int32_t numCalls; 187 int32_t lastSteps; 188} callBackContext; 189 190static UBool U_EXPORT2 U_CALLCONV 191TestCallbackFn(const void *context, int32_t steps) { 192 callBackContext *info = (callBackContext *)context; 193 if (info->lastSteps+1 != steps) { 194 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 195 } 196 info->lastSteps = steps; 197 info->numCalls++; 198 return (info->numCalls < info->maxCalls); 199} 200 201/* 202 * Regular Expression C API Tests 203 */ 204static void TestRegexCAPI(void) { 205 UErrorCode status = U_ZERO_ERROR; 206 URegularExpression *re; 207 UChar pat[200]; 208 UChar *minus1; 209 210 memset(&minus1, -1, sizeof(minus1)); 211 212 /* Mimimalist open/close */ 213 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); 214 re = uregex_open(pat, -1, 0, 0, &status); 215 if (U_FAILURE(status)) { 216 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 217 return; 218 } 219 uregex_close(re); 220 221 /* Open with all flag values set */ 222 status = U_ZERO_ERROR; 223 re = uregex_open(pat, -1, 224 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL, 225 0, &status); 226 TEST_ASSERT_SUCCESS(status); 227 uregex_close(re); 228 229 /* Open with an invalid flag */ 230 status = U_ZERO_ERROR; 231 re = uregex_open(pat, -1, 0x40000000, 0, &status); 232 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 233 uregex_close(re); 234 235 /* Open with an unimplemented flag */ 236 status = U_ZERO_ERROR; 237 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status); 238 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); 239 uregex_close(re); 240 241 /* openC with an invalid parameter */ 242 status = U_ZERO_ERROR; 243 re = uregex_openC(NULL, 244 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 245 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 246 247 /* openC with an invalid parameter */ 248 status = U_USELESS_COLLATOR_ERROR; 249 re = uregex_openC(NULL, 250 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 251 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); 252 253 /* openC open from a C string */ 254 { 255 const UChar *p; 256 int32_t len; 257 status = U_ZERO_ERROR; 258 re = uregex_openC("abc*", 0, 0, &status); 259 TEST_ASSERT_SUCCESS(status); 260 p = uregex_pattern(re, &len, &status); 261 TEST_ASSERT_SUCCESS(status); 262 263 /* The TEST_ASSERT_SUCCESS above should change too... */ 264 if(U_SUCCESS(status)) { 265 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); 266 TEST_ASSERT(u_strcmp(pat, p) == 0); 267 TEST_ASSERT(len==(int32_t)strlen("abc*")); 268 } 269 270 uregex_close(re); 271 272 /* TODO: Open with ParseError parameter */ 273 } 274 275 /* 276 * clone 277 */ 278 { 279 URegularExpression *clone1; 280 URegularExpression *clone2; 281 URegularExpression *clone3; 282 UChar testString1[30]; 283 UChar testString2[30]; 284 UBool result; 285 286 287 status = U_ZERO_ERROR; 288 re = uregex_openC("abc*", 0, 0, &status); 289 TEST_ASSERT_SUCCESS(status); 290 clone1 = uregex_clone(re, &status); 291 TEST_ASSERT_SUCCESS(status); 292 TEST_ASSERT(clone1 != NULL); 293 294 status = U_ZERO_ERROR; 295 clone2 = uregex_clone(re, &status); 296 TEST_ASSERT_SUCCESS(status); 297 TEST_ASSERT(clone2 != NULL); 298 uregex_close(re); 299 300 status = U_ZERO_ERROR; 301 clone3 = uregex_clone(clone2, &status); 302 TEST_ASSERT_SUCCESS(status); 303 TEST_ASSERT(clone3 != NULL); 304 305 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); 306 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); 307 308 status = U_ZERO_ERROR; 309 uregex_setText(clone1, testString1, -1, &status); 310 TEST_ASSERT_SUCCESS(status); 311 result = uregex_lookingAt(clone1, 0, &status); 312 TEST_ASSERT_SUCCESS(status); 313 TEST_ASSERT(result==TRUE); 314 315 status = U_ZERO_ERROR; 316 uregex_setText(clone2, testString2, -1, &status); 317 TEST_ASSERT_SUCCESS(status); 318 result = uregex_lookingAt(clone2, 0, &status); 319 TEST_ASSERT_SUCCESS(status); 320 TEST_ASSERT(result==FALSE); 321 result = uregex_find(clone2, 0, &status); 322 TEST_ASSERT_SUCCESS(status); 323 TEST_ASSERT(result==TRUE); 324 325 uregex_close(clone1); 326 uregex_close(clone2); 327 uregex_close(clone3); 328 329 } 330 331 /* 332 * pattern() 333 */ 334 { 335 const UChar *resultPat; 336 int32_t resultLen; 337 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); 338 status = U_ZERO_ERROR; 339 re = uregex_open(pat, -1, 0, NULL, &status); 340 resultPat = uregex_pattern(re, &resultLen, &status); 341 TEST_ASSERT_SUCCESS(status); 342 343 /* The TEST_ASSERT_SUCCESS above should change too... */ 344 if (U_SUCCESS(status)) { 345 TEST_ASSERT(resultLen == -1); 346 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 347 } 348 349 uregex_close(re); 350 351 status = U_ZERO_ERROR; 352 re = uregex_open(pat, 3, 0, NULL, &status); 353 resultPat = uregex_pattern(re, &resultLen, &status); 354 TEST_ASSERT_SUCCESS(status); 355 TEST_ASSERT_SUCCESS(status); 356 357 /* The TEST_ASSERT_SUCCESS above should change too... */ 358 if (U_SUCCESS(status)) { 359 TEST_ASSERT(resultLen == 3); 360 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 361 TEST_ASSERT(u_strlen(resultPat) == 3); 362 } 363 364 uregex_close(re); 365 } 366 367 /* 368 * flags() 369 */ 370 { 371 int32_t t; 372 373 status = U_ZERO_ERROR; 374 re = uregex_open(pat, -1, 0, NULL, &status); 375 t = uregex_flags(re, &status); 376 TEST_ASSERT_SUCCESS(status); 377 TEST_ASSERT(t == 0); 378 uregex_close(re); 379 380 status = U_ZERO_ERROR; 381 re = uregex_open(pat, -1, 0, NULL, &status); 382 t = uregex_flags(re, &status); 383 TEST_ASSERT_SUCCESS(status); 384 TEST_ASSERT(t == 0); 385 uregex_close(re); 386 387 status = U_ZERO_ERROR; 388 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status); 389 t = uregex_flags(re, &status); 390 TEST_ASSERT_SUCCESS(status); 391 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); 392 uregex_close(re); 393 } 394 395 /* 396 * setText() and lookingAt() 397 */ 398 { 399 UChar text1[50]; 400 UChar text2[50]; 401 UBool result; 402 403 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); 404 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); 405 status = U_ZERO_ERROR; 406 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); 407 re = uregex_open(pat, -1, 0, NULL, &status); 408 TEST_ASSERT_SUCCESS(status); 409 410 /* Operation before doing a setText should fail... */ 411 status = U_ZERO_ERROR; 412 uregex_lookingAt(re, 0, &status); 413 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 414 415 status = U_ZERO_ERROR; 416 uregex_setText(re, text1, -1, &status); 417 result = uregex_lookingAt(re, 0, &status); 418 TEST_ASSERT(result == TRUE); 419 TEST_ASSERT_SUCCESS(status); 420 421 status = U_ZERO_ERROR; 422 uregex_setText(re, text2, -1, &status); 423 result = uregex_lookingAt(re, 0, &status); 424 TEST_ASSERT(result == FALSE); 425 TEST_ASSERT_SUCCESS(status); 426 427 status = U_ZERO_ERROR; 428 uregex_setText(re, text1, -1, &status); 429 result = uregex_lookingAt(re, 0, &status); 430 TEST_ASSERT(result == TRUE); 431 TEST_ASSERT_SUCCESS(status); 432 433 status = U_ZERO_ERROR; 434 uregex_setText(re, text1, 5, &status); 435 result = uregex_lookingAt(re, 0, &status); 436 TEST_ASSERT(result == FALSE); 437 TEST_ASSERT_SUCCESS(status); 438 439 status = U_ZERO_ERROR; 440 uregex_setText(re, text1, 6, &status); 441 result = uregex_lookingAt(re, 0, &status); 442 TEST_ASSERT(result == TRUE); 443 TEST_ASSERT_SUCCESS(status); 444 445 uregex_close(re); 446 } 447 448 449 /* 450 * getText() 451 */ 452 { 453 UChar text1[50]; 454 UChar text2[50]; 455 const UChar *result; 456 int32_t textLength; 457 458 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); 459 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); 460 status = U_ZERO_ERROR; 461 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); 462 re = uregex_open(pat, -1, 0, NULL, &status); 463 464 uregex_setText(re, text1, -1, &status); 465 result = uregex_getText(re, &textLength, &status); 466 TEST_ASSERT(result == text1); 467 TEST_ASSERT(textLength == -1); 468 TEST_ASSERT_SUCCESS(status); 469 470 status = U_ZERO_ERROR; 471 uregex_setText(re, text2, 7, &status); 472 result = uregex_getText(re, &textLength, &status); 473 TEST_ASSERT(result == text2); 474 TEST_ASSERT(textLength == 7); 475 TEST_ASSERT_SUCCESS(status); 476 477 status = U_ZERO_ERROR; 478 uregex_setText(re, text2, 4, &status); 479 result = uregex_getText(re, &textLength, &status); 480 TEST_ASSERT(result == text2); 481 TEST_ASSERT(textLength == 4); 482 TEST_ASSERT_SUCCESS(status); 483 uregex_close(re); 484 } 485 486 /* 487 * matches() 488 */ 489 { 490 UChar text1[50]; 491 UBool result; 492 int len; 493 UChar nullString[] = {0,0,0}; 494 495 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1)); 496 status = U_ZERO_ERROR; 497 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); 498 re = uregex_open(pat, -1, 0, NULL, &status); 499 500 uregex_setText(re, text1, -1, &status); 501 result = uregex_matches(re, 0, &status); 502 TEST_ASSERT(result == FALSE); 503 TEST_ASSERT_SUCCESS(status); 504 505 status = U_ZERO_ERROR; 506 uregex_setText(re, text1, 6, &status); 507 result = uregex_matches(re, 0, &status); 508 TEST_ASSERT(result == TRUE); 509 TEST_ASSERT_SUCCESS(status); 510 511 status = U_ZERO_ERROR; 512 uregex_setText(re, text1, 6, &status); 513 result = uregex_matches(re, 1, &status); 514 TEST_ASSERT(result == FALSE); 515 TEST_ASSERT_SUCCESS(status); 516 uregex_close(re); 517 518 status = U_ZERO_ERROR; 519 re = uregex_openC(".?", 0, NULL, &status); 520 uregex_setText(re, text1, -1, &status); 521 len = u_strlen(text1); 522 result = uregex_matches(re, len, &status); 523 TEST_ASSERT(result == TRUE); 524 TEST_ASSERT_SUCCESS(status); 525 526 status = U_ZERO_ERROR; 527 uregex_setText(re, nullString, -1, &status); 528 TEST_ASSERT_SUCCESS(status); 529 result = uregex_matches(re, 0, &status); 530 TEST_ASSERT(result == TRUE); 531 TEST_ASSERT_SUCCESS(status); 532 uregex_close(re); 533 } 534 535 536 /* 537 * lookingAt() Used in setText test. 538 */ 539 540 541 /* 542 * find(), findNext, start, end, reset 543 */ 544 { 545 UChar text1[50]; 546 UBool result; 547 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); 548 status = U_ZERO_ERROR; 549 re = uregex_openC("rx", 0, NULL, &status); 550 551 uregex_setText(re, text1, -1, &status); 552 result = uregex_find(re, 0, &status); 553 TEST_ASSERT(result == TRUE); 554 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 555 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 556 TEST_ASSERT_SUCCESS(status); 557 558 result = uregex_find(re, 9, &status); 559 TEST_ASSERT(result == TRUE); 560 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 561 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 562 TEST_ASSERT_SUCCESS(status); 563 564 result = uregex_find(re, 14, &status); 565 TEST_ASSERT(result == FALSE); 566 TEST_ASSERT_SUCCESS(status); 567 568 status = U_ZERO_ERROR; 569 uregex_reset(re, 0, &status); 570 571 result = uregex_findNext(re, &status); 572 TEST_ASSERT(result == TRUE); 573 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 574 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 575 TEST_ASSERT_SUCCESS(status); 576 577 result = uregex_findNext(re, &status); 578 TEST_ASSERT(result == TRUE); 579 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 580 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 581 TEST_ASSERT_SUCCESS(status); 582 583 status = U_ZERO_ERROR; 584 uregex_reset(re, 12, &status); 585 586 result = uregex_findNext(re, &status); 587 TEST_ASSERT(result == TRUE); 588 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 589 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 590 TEST_ASSERT_SUCCESS(status); 591 592 result = uregex_findNext(re, &status); 593 TEST_ASSERT(result == FALSE); 594 TEST_ASSERT_SUCCESS(status); 595 596 uregex_close(re); 597 } 598 599 /* 600 * groupCount 601 */ 602 { 603 int32_t result; 604 605 status = U_ZERO_ERROR; 606 re = uregex_openC("abc", 0, NULL, &status); 607 result = uregex_groupCount(re, &status); 608 TEST_ASSERT_SUCCESS(status); 609 TEST_ASSERT(result == 0); 610 uregex_close(re); 611 612 status = U_ZERO_ERROR; 613 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); 614 result = uregex_groupCount(re, &status); 615 TEST_ASSERT_SUCCESS(status); 616 TEST_ASSERT(result == 3); 617 uregex_close(re); 618 619 } 620 621 622 /* 623 * group() 624 */ 625 { 626 UChar text1[80]; 627 UChar buf[80]; 628 UBool result; 629 int32_t resultSz; 630 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1)); 631 632 status = U_ZERO_ERROR; 633 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 634 TEST_ASSERT_SUCCESS(status); 635 636 637 uregex_setText(re, text1, -1, &status); 638 result = uregex_find(re, 0, &status); 639 TEST_ASSERT(result==TRUE); 640 641 /* Capture Group 0, the full match. Should succeed. */ 642 status = U_ZERO_ERROR; 643 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status); 644 TEST_ASSERT_SUCCESS(status); 645 TEST_ASSERT_STRING("abc interior def", buf, TRUE); 646 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 647 648 /* Capture group #1. Should succeed. */ 649 status = U_ZERO_ERROR; 650 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status); 651 TEST_ASSERT_SUCCESS(status); 652 TEST_ASSERT_STRING(" interior ", buf, TRUE); 653 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); 654 655 /* Capture group out of range. Error. */ 656 status = U_ZERO_ERROR; 657 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status); 658 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 659 660 /* NULL buffer, pure pre-flight */ 661 status = U_ZERO_ERROR; 662 resultSz = uregex_group(re, 0, NULL, 0, &status); 663 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 664 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 665 666 /* Too small buffer, truncated string */ 667 status = U_ZERO_ERROR; 668 memset(buf, -1, sizeof(buf)); 669 resultSz = uregex_group(re, 0, buf, 5, &status); 670 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 671 TEST_ASSERT_STRING("abc i", buf, FALSE); 672 TEST_ASSERT(buf[5] == (UChar)0xffff); 673 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 674 675 /* Output string just fits buffer, no NUL term. */ 676 status = U_ZERO_ERROR; 677 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status); 678 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 679 TEST_ASSERT_STRING("abc interior def", buf, FALSE); 680 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 681 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); 682 683 uregex_close(re); 684 685 } 686 687 /* 688 * Regions 689 */ 690 691 692 /* SetRegion(), getRegion() do something */ 693 TEST_SETUP(".*", "0123456789ABCDEF", 0) 694 UChar resultString[40]; 695 TEST_ASSERT(uregex_regionStart(re, &status) == 0); 696 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); 697 uregex_setRegion(re, 3, 6, &status); 698 TEST_ASSERT(uregex_regionStart(re, &status) == 3); 699 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); 700 TEST_ASSERT(uregex_findNext(re, &status)); 701 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3) 702 TEST_ASSERT_STRING("345", resultString, TRUE); 703 TEST_TEARDOWN; 704 705 /* find(start=-1) uses regions */ 706 TEST_SETUP(".*", "0123456789ABCDEF", 0); 707 uregex_setRegion(re, 4, 6, &status); 708 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 709 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 710 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 711 TEST_TEARDOWN; 712 713 /* find (start >=0) does not use regions */ 714 TEST_SETUP(".*", "0123456789ABCDEF", 0); 715 uregex_setRegion(re, 4, 6, &status); 716 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 717 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 718 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 719 TEST_TEARDOWN; 720 721 /* findNext() obeys regions */ 722 TEST_SETUP(".", "0123456789ABCDEF", 0); 723 uregex_setRegion(re, 4, 6, &status); 724 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); 725 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 726 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); 727 TEST_ASSERT(uregex_start(re, 0, &status) == 5); 728 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); 729 TEST_TEARDOWN; 730 731 /* matches(start=-1) uses regions */ 732 /* Also, verify that non-greedy *? succeeds in finding the full match. */ 733 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 734 uregex_setRegion(re, 4, 6, &status); 735 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); 736 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 737 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 738 TEST_TEARDOWN; 739 740 /* matches (start >=0) does not use regions */ 741 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 742 uregex_setRegion(re, 4, 6, &status); 743 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); 744 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 745 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 746 TEST_TEARDOWN; 747 748 /* lookingAt(start=-1) uses regions */ 749 /* Also, verify that non-greedy *? finds the first (shortest) match. */ 750 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 751 uregex_setRegion(re, 4, 6, &status); 752 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); 753 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 754 TEST_ASSERT(uregex_end(re, 0, &status) == 4); 755 TEST_TEARDOWN; 756 757 /* lookingAt (start >=0) does not use regions */ 758 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 759 uregex_setRegion(re, 4, 6, &status); 760 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); 761 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 762 TEST_ASSERT(uregex_end(re, 0, &status) == 0); 763 TEST_TEARDOWN; 764 765 /* hitEnd() */ 766 TEST_SETUP("[a-f]*", "abcdefghij", 0); 767 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 768 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); 769 TEST_TEARDOWN; 770 771 TEST_SETUP("[a-f]*", "abcdef", 0); 772 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 773 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); 774 TEST_TEARDOWN; 775 776 /* requireEnd */ 777 TEST_SETUP("abcd", "abcd", 0); 778 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 779 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); 780 TEST_TEARDOWN; 781 782 TEST_SETUP("abcd$", "abcd", 0); 783 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 784 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); 785 TEST_TEARDOWN; 786 787 /* anchoringBounds */ 788 TEST_SETUP("abc$", "abcdef", 0); 789 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); 790 uregex_useAnchoringBounds(re, FALSE, &status); 791 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); 792 793 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); 794 uregex_useAnchoringBounds(re, TRUE, &status); 795 uregex_setRegion(re, 0, 3, &status); 796 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 797 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 798 TEST_TEARDOWN; 799 800 /* Transparent Bounds */ 801 TEST_SETUP("abc(?=def)", "abcdef", 0); 802 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); 803 uregex_useTransparentBounds(re, TRUE, &status); 804 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); 805 806 uregex_useTransparentBounds(re, FALSE, &status); 807 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ 808 uregex_setRegion(re, 0, 3, &status); 809 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */ 810 uregex_useTransparentBounds(re, TRUE, &status); 811 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */ 812 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 813 TEST_TEARDOWN; 814 815 816 /* 817 * replaceFirst() 818 */ 819 { 820 UChar text1[80]; 821 UChar text2[80]; 822 UChar replText[80]; 823 UChar buf[80]; 824 int32_t resultSz; 825 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 826 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 827 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); 828 829 status = U_ZERO_ERROR; 830 re = uregex_openC("x(.*?)x", 0, NULL, &status); 831 TEST_ASSERT_SUCCESS(status); 832 833 /* Normal case, with match */ 834 uregex_setText(re, text1, -1, &status); 835 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 836 TEST_ASSERT_SUCCESS(status); 837 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); 838 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 839 840 /* No match. Text should copy to output with no changes. */ 841 status = U_ZERO_ERROR; 842 uregex_setText(re, text2, -1, &status); 843 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 844 TEST_ASSERT_SUCCESS(status); 845 TEST_ASSERT_STRING("No match here.", buf, TRUE); 846 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); 847 848 /* Match, output just fills buffer, no termination warning. */ 849 status = U_ZERO_ERROR; 850 uregex_setText(re, text1, -1, &status); 851 memset(buf, -1, sizeof(buf)); 852 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 853 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 854 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 855 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 856 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 857 858 /* Do the replaceFirst again, without first resetting anything. 859 * Should give the same results. 860 */ 861 status = U_ZERO_ERROR; 862 memset(buf, -1, sizeof(buf)); 863 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 864 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 865 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 866 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 867 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 868 869 /* NULL buffer, zero buffer length */ 870 status = U_ZERO_ERROR; 871 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); 872 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 873 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 874 875 /* Buffer too small by one */ 876 status = U_ZERO_ERROR; 877 memset(buf, -1, sizeof(buf)); 878 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status); 879 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 880 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); 881 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 882 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 883 884 uregex_close(re); 885 } 886 887 888 /* 889 * replaceAll() 890 */ 891 { 892 UChar text1[80]; /* "Replace xaax x1x x...x." */ 893 UChar text2[80]; /* "No match Here" */ 894 UChar replText[80]; /* "<$1>" */ 895 UChar replText2[80]; /* "<<$1>>" */ 896 const char * pattern = "x(.*?)x"; 897 const char * expectedResult = "Replace <aa> <1> <...>."; 898 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; 899 UChar buf[80]; 900 int32_t resultSize; 901 int32_t expectedResultSize; 902 int32_t expectedResultSize2; 903 int32_t i; 904 905 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 906 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 907 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); 908 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2)); 909 expectedResultSize = strlen(expectedResult); 910 expectedResultSize2 = strlen(expectedResult2); 911 912 status = U_ZERO_ERROR; 913 re = uregex_openC(pattern, 0, NULL, &status); 914 TEST_ASSERT_SUCCESS(status); 915 916 /* Normal case, with match */ 917 uregex_setText(re, text1, -1, &status); 918 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 919 TEST_ASSERT_SUCCESS(status); 920 TEST_ASSERT_STRING(expectedResult, buf, TRUE); 921 TEST_ASSERT(resultSize == expectedResultSize); 922 923 /* No match. Text should copy to output with no changes. */ 924 status = U_ZERO_ERROR; 925 uregex_setText(re, text2, -1, &status); 926 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); 927 TEST_ASSERT_SUCCESS(status); 928 TEST_ASSERT_STRING("No match here.", buf, TRUE); 929 TEST_ASSERT(resultSize == u_strlen(text2)); 930 931 /* Match, output just fills buffer, no termination warning. */ 932 status = U_ZERO_ERROR; 933 uregex_setText(re, text1, -1, &status); 934 memset(buf, -1, sizeof(buf)); 935 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status); 936 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 937 TEST_ASSERT_STRING(expectedResult, buf, FALSE); 938 TEST_ASSERT(resultSize == expectedResultSize); 939 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 940 941 /* Do the replaceFirst again, without first resetting anything. 942 * Should give the same results. 943 */ 944 status = U_ZERO_ERROR; 945 memset(buf, -1, sizeof(buf)); 946 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); 947 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 948 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); 949 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 950 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 951 952 /* NULL buffer, zero buffer length */ 953 status = U_ZERO_ERROR; 954 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); 955 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 956 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 957 958 /* Buffer too small. Try every size, which will tickle edge cases 959 * in uregex_appendReplacement (used by replaceAll) */ 960 for (i=0; i<expectedResultSize; i++) { 961 char expected[80]; 962 status = U_ZERO_ERROR; 963 memset(buf, -1, sizeof(buf)); 964 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); 965 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 966 strcpy(expected, expectedResult); 967 expected[i] = 0; 968 TEST_ASSERT_STRING(expected, buf, FALSE); 969 TEST_ASSERT(resultSize == expectedResultSize); 970 TEST_ASSERT(buf[i] == (UChar)0xffff); 971 } 972 973 /* Buffer too small. Same as previous test, except this time the replacement 974 * text is longer than the match capture group, making the length of the complete 975 * replacement longer than the original string. 976 */ 977 for (i=0; i<expectedResultSize2; i++) { 978 char expected[80]; 979 status = U_ZERO_ERROR; 980 memset(buf, -1, sizeof(buf)); 981 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); 982 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 983 strcpy(expected, expectedResult2); 984 expected[i] = 0; 985 TEST_ASSERT_STRING(expected, buf, FALSE); 986 TEST_ASSERT(resultSize == expectedResultSize2); 987 TEST_ASSERT(buf[i] == (UChar)0xffff); 988 } 989 990 991 uregex_close(re); 992 } 993 994 995 /* 996 * appendReplacement() 997 */ 998 { 999 UChar text[100]; 1000 UChar repl[100]; 1001 UChar buf[100]; 1002 UChar *bufPtr; 1003 int32_t bufCap; 1004 1005 1006 status = U_ZERO_ERROR; 1007 re = uregex_openC(".*", 0, 0, &status); 1008 TEST_ASSERT_SUCCESS(status); 1009 1010 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); 1011 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); 1012 uregex_setText(re, text, -1, &status); 1013 1014 /* match covers whole target string */ 1015 uregex_find(re, 0, &status); 1016 TEST_ASSERT_SUCCESS(status); 1017 bufPtr = buf; 1018 bufCap = UPRV_LENGTHOF(buf); 1019 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1020 TEST_ASSERT_SUCCESS(status); 1021 TEST_ASSERT_STRING("some other", buf, TRUE); 1022 1023 /* Match has \u \U escapes */ 1024 uregex_find(re, 0, &status); 1025 TEST_ASSERT_SUCCESS(status); 1026 bufPtr = buf; 1027 bufCap = UPRV_LENGTHOF(buf); 1028 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl)); 1029 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1030 TEST_ASSERT_SUCCESS(status); 1031 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1032 1033 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */ 1034 status = U_ZERO_ERROR; 1035 uregex_find(re, 0, &status); 1036 TEST_ASSERT_SUCCESS(status); 1037 bufPtr = buf; 1038 status = U_BUFFER_OVERFLOW_ERROR; 1039 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); 1040 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1041 1042 uregex_close(re); 1043 } 1044 1045 1046 /* 1047 * appendTail(). Checked in ReplaceFirst(), replaceAll(). 1048 */ 1049 1050 /* 1051 * split() 1052 */ 1053 { 1054 UChar textToSplit[80]; 1055 UChar text2[80]; 1056 UChar buf[200]; 1057 UChar *fields[10]; 1058 int32_t numFields; 1059 int32_t requiredCapacity; 1060 int32_t spaceNeeded; 1061 int32_t sz; 1062 1063 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit)); 1064 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1065 1066 status = U_ZERO_ERROR; 1067 re = uregex_openC(":", 0, NULL, &status); 1068 1069 1070 /* Simple split */ 1071 1072 uregex_setText(re, textToSplit, -1, &status); 1073 TEST_ASSERT_SUCCESS(status); 1074 1075 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1076 if (U_SUCCESS(status)) { 1077 memset(fields, -1, sizeof(fields)); 1078 numFields = 1079 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status); 1080 TEST_ASSERT_SUCCESS(status); 1081 1082 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1083 if(U_SUCCESS(status)) { 1084 TEST_ASSERT(numFields == 3); 1085 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1086 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1087 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1088 TEST_ASSERT(fields[3] == NULL); 1089 1090 spaceNeeded = u_strlen(textToSplit) - 1091 (numFields - 1) + /* Field delimiters do not appear in output */ 1092 numFields; /* Each field gets a NUL terminator */ 1093 1094 TEST_ASSERT(spaceNeeded == requiredCapacity); 1095 } 1096 } 1097 1098 uregex_close(re); 1099 1100 1101 /* Split with too few output strings available */ 1102 status = U_ZERO_ERROR; 1103 re = uregex_openC(":", 0, NULL, &status); 1104 uregex_setText(re, textToSplit, -1, &status); 1105 TEST_ASSERT_SUCCESS(status); 1106 1107 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1108 if(U_SUCCESS(status)) { 1109 memset(fields, -1, sizeof(fields)); 1110 numFields = 1111 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status); 1112 TEST_ASSERT_SUCCESS(status); 1113 1114 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1115 if(U_SUCCESS(status)) { 1116 TEST_ASSERT(numFields == 2); 1117 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1118 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); 1119 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1120 1121 spaceNeeded = u_strlen(textToSplit) - 1122 (numFields - 1) + /* Field delimiters do not appear in output */ 1123 numFields; /* Each field gets a NUL terminator */ 1124 1125 TEST_ASSERT(spaceNeeded == requiredCapacity); 1126 1127 /* Split with a range of output buffer sizes. */ 1128 spaceNeeded = u_strlen(textToSplit) - 1129 (numFields - 1) + /* Field delimiters do not appear in output */ 1130 numFields; /* Each field gets a NUL terminator */ 1131 1132 for (sz=0; sz < spaceNeeded+1; sz++) { 1133 memset(fields, -1, sizeof(fields)); 1134 status = U_ZERO_ERROR; 1135 numFields = 1136 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status); 1137 if (sz >= spaceNeeded) { 1138 TEST_ASSERT_SUCCESS(status); 1139 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1140 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1141 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1142 } else { 1143 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1144 } 1145 TEST_ASSERT(numFields == 3); 1146 TEST_ASSERT(fields[3] == NULL); 1147 TEST_ASSERT(spaceNeeded == requiredCapacity); 1148 } 1149 } 1150 } 1151 1152 uregex_close(re); 1153 } 1154 1155 1156 1157 1158 /* Split(), part 2. Patterns with capture groups. The capture group text 1159 * comes out as additional fields. */ 1160 { 1161 UChar textToSplit[80]; 1162 UChar buf[200]; 1163 UChar *fields[10]; 1164 int32_t numFields; 1165 int32_t requiredCapacity; 1166 int32_t spaceNeeded; 1167 int32_t sz; 1168 1169 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit)); 1170 1171 status = U_ZERO_ERROR; 1172 re = uregex_openC("<(.*?)>", 0, NULL, &status); 1173 1174 uregex_setText(re, textToSplit, -1, &status); 1175 TEST_ASSERT_SUCCESS(status); 1176 1177 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1178 if(U_SUCCESS(status)) { 1179 memset(fields, -1, sizeof(fields)); 1180 numFields = 1181 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status); 1182 TEST_ASSERT_SUCCESS(status); 1183 1184 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1185 if(U_SUCCESS(status)) { 1186 TEST_ASSERT(numFields == 5); 1187 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1188 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1189 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1190 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1191 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1192 TEST_ASSERT(fields[5] == NULL); 1193 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1194 TEST_ASSERT(spaceNeeded == requiredCapacity); 1195 } 1196 } 1197 1198 /* Split with too few output strings available (2) */ 1199 status = U_ZERO_ERROR; 1200 memset(fields, -1, sizeof(fields)); 1201 numFields = 1202 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status); 1203 TEST_ASSERT_SUCCESS(status); 1204 1205 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1206 if(U_SUCCESS(status)) { 1207 TEST_ASSERT(numFields == 2); 1208 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1209 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); 1210 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1211 1212 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */ 1213 TEST_ASSERT(spaceNeeded == requiredCapacity); 1214 } 1215 1216 /* Split with too few output strings available (3) */ 1217 status = U_ZERO_ERROR; 1218 memset(fields, -1, sizeof(fields)); 1219 numFields = 1220 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status); 1221 TEST_ASSERT_SUCCESS(status); 1222 1223 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1224 if(U_SUCCESS(status)) { 1225 TEST_ASSERT(numFields == 3); 1226 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1227 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1228 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); 1229 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); 1230 1231 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */ 1232 TEST_ASSERT(spaceNeeded == requiredCapacity); 1233 } 1234 1235 /* Split with just enough output strings available (5) */ 1236 status = U_ZERO_ERROR; 1237 memset(fields, -1, sizeof(fields)); 1238 numFields = 1239 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status); 1240 TEST_ASSERT_SUCCESS(status); 1241 1242 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1243 if(U_SUCCESS(status)) { 1244 TEST_ASSERT(numFields == 5); 1245 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1246 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1247 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1248 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1249 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1250 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); 1251 1252 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1253 TEST_ASSERT(spaceNeeded == requiredCapacity); 1254 } 1255 1256 /* Split, end of text is a field delimiter. */ 1257 status = U_ZERO_ERROR; 1258 sz = strlen("first <tag-a> second<tag-b>"); 1259 uregex_setText(re, textToSplit, sz, &status); 1260 TEST_ASSERT_SUCCESS(status); 1261 1262 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1263 if(U_SUCCESS(status)) { 1264 memset(fields, -1, sizeof(fields)); 1265 numFields = 1266 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status); 1267 TEST_ASSERT_SUCCESS(status); 1268 1269 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1270 if(U_SUCCESS(status)) { 1271 TEST_ASSERT(numFields == 5); 1272 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1273 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1274 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1275 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1276 TEST_ASSERT_STRING("", fields[4], TRUE); 1277 TEST_ASSERT(fields[5] == NULL); 1278 TEST_ASSERT(fields[8] == NULL); 1279 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); 1280 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */ 1281 TEST_ASSERT(spaceNeeded == requiredCapacity); 1282 } 1283 } 1284 1285 uregex_close(re); 1286 } 1287 1288 /* 1289 * set/getTimeLimit 1290 */ 1291 TEST_SETUP("abc$", "abcdef", 0); 1292 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); 1293 uregex_setTimeLimit(re, 1000, &status); 1294 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1295 TEST_ASSERT_SUCCESS(status); 1296 uregex_setTimeLimit(re, -1, &status); 1297 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1298 status = U_ZERO_ERROR; 1299 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1300 TEST_TEARDOWN; 1301 1302 /* 1303 * set/get Stack Limit 1304 */ 1305 TEST_SETUP("abc$", "abcdef", 0); 1306 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); 1307 uregex_setStackLimit(re, 40000, &status); 1308 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1309 TEST_ASSERT_SUCCESS(status); 1310 uregex_setStackLimit(re, -1, &status); 1311 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1312 status = U_ZERO_ERROR; 1313 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1314 TEST_TEARDOWN; 1315 1316 1317 /* 1318 * Get/Set callback functions 1319 * This test is copied from intltest regex/Callbacks 1320 * The pattern and test data will run long enough to cause the callback 1321 * to be invoked. The nested '+' operators give exponential time 1322 * behavior with increasing string length. 1323 */ 1324 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) 1325 callBackContext cbInfo = {4, 0, 0}; 1326 const void *pContext = &cbInfo; 1327 URegexMatchCallback *returnedFn = &TestCallbackFn; 1328 1329 /* Getting the callback fn when it hasn't been set must return NULL */ 1330 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1331 TEST_ASSERT_SUCCESS(status); 1332 TEST_ASSERT(returnedFn == NULL); 1333 TEST_ASSERT(pContext == NULL); 1334 1335 /* Set thecallback and do a match. */ 1336 /* The callback function should record that it has been called. */ 1337 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); 1338 TEST_ASSERT_SUCCESS(status); 1339 TEST_ASSERT(cbInfo.numCalls == 0); 1340 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); 1341 TEST_ASSERT_SUCCESS(status); 1342 TEST_ASSERT(cbInfo.numCalls > 0); 1343 1344 /* Getting the callback should return the values that were set above. */ 1345 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1346 TEST_ASSERT(returnedFn == &TestCallbackFn); 1347 TEST_ASSERT(pContext == &cbInfo); 1348 1349 TEST_TEARDOWN; 1350} 1351 1352 1353 1354static void TestBug4315(void) { 1355 UErrorCode theICUError = U_ZERO_ERROR; 1356 URegularExpression *theRegEx; 1357 UChar *textBuff; 1358 const char *thePattern; 1359 UChar theString[100]; 1360 UChar *destFields[24]; 1361 int32_t neededLength1; 1362 int32_t neededLength2; 1363 1364 int32_t wordCount = 0; 1365 int32_t destFieldsSize = 24; 1366 1367 thePattern = "ck "; 1368 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle."); 1369 1370 /* open a regex */ 1371 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); 1372 TEST_ASSERT_SUCCESS(theICUError); 1373 1374 /* set the input string */ 1375 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); 1376 TEST_ASSERT_SUCCESS(theICUError); 1377 1378 /* split */ 1379 /*explicitly pass NULL and 0 to force the overflow error -> this is where the 1380 * error occurs! */ 1381 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, 1382 destFieldsSize, &theICUError); 1383 1384 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); 1385 TEST_ASSERT(wordCount==3); 1386 1387 if(theICUError == U_BUFFER_OVERFLOW_ERROR) 1388 { 1389 theICUError = U_ZERO_ERROR; 1390 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); 1391 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2, 1392 destFields, destFieldsSize, &theICUError); 1393 TEST_ASSERT(wordCount==3); 1394 TEST_ASSERT_SUCCESS(theICUError); 1395 TEST_ASSERT(neededLength1 == neededLength2); 1396 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); 1397 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE); 1398 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); 1399 TEST_ASSERT(destFields[3] == NULL); 1400 free(textBuff); 1401 } 1402 uregex_close(theRegEx); 1403} 1404 1405/* Based on TestRegexCAPI() */ 1406static void TestUTextAPI(void) { 1407 UErrorCode status = U_ZERO_ERROR; 1408 URegularExpression *re; 1409 UText patternText = UTEXT_INITIALIZER; 1410 UChar pat[200]; 1411 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 }; 1412 1413 /* Mimimalist open/close */ 1414 utext_openUTF8(&patternText, patternTextUTF8, -1, &status); 1415 re = uregex_openUText(&patternText, 0, 0, &status); 1416 if (U_FAILURE(status)) { 1417 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 1418 utext_close(&patternText); 1419 return; 1420 } 1421 uregex_close(re); 1422 1423 /* Open with all flag values set */ 1424 status = U_ZERO_ERROR; 1425 re = uregex_openUText(&patternText, 1426 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 1427 0, &status); 1428 TEST_ASSERT_SUCCESS(status); 1429 uregex_close(re); 1430 1431 /* Open with an invalid flag */ 1432 status = U_ZERO_ERROR; 1433 re = uregex_openUText(&patternText, 0x40000000, 0, &status); 1434 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 1435 uregex_close(re); 1436 1437 /* open with an invalid parameter */ 1438 status = U_ZERO_ERROR; 1439 re = uregex_openUText(NULL, 1440 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 1441 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 1442 1443 /* 1444 * clone 1445 */ 1446 { 1447 URegularExpression *clone1; 1448 URegularExpression *clone2; 1449 URegularExpression *clone3; 1450 UChar testString1[30]; 1451 UChar testString2[30]; 1452 UBool result; 1453 1454 1455 status = U_ZERO_ERROR; 1456 re = uregex_openUText(&patternText, 0, 0, &status); 1457 TEST_ASSERT_SUCCESS(status); 1458 clone1 = uregex_clone(re, &status); 1459 TEST_ASSERT_SUCCESS(status); 1460 TEST_ASSERT(clone1 != NULL); 1461 1462 status = U_ZERO_ERROR; 1463 clone2 = uregex_clone(re, &status); 1464 TEST_ASSERT_SUCCESS(status); 1465 TEST_ASSERT(clone2 != NULL); 1466 uregex_close(re); 1467 1468 status = U_ZERO_ERROR; 1469 clone3 = uregex_clone(clone2, &status); 1470 TEST_ASSERT_SUCCESS(status); 1471 TEST_ASSERT(clone3 != NULL); 1472 1473 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); 1474 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); 1475 1476 status = U_ZERO_ERROR; 1477 uregex_setText(clone1, testString1, -1, &status); 1478 TEST_ASSERT_SUCCESS(status); 1479 result = uregex_lookingAt(clone1, 0, &status); 1480 TEST_ASSERT_SUCCESS(status); 1481 TEST_ASSERT(result==TRUE); 1482 1483 status = U_ZERO_ERROR; 1484 uregex_setText(clone2, testString2, -1, &status); 1485 TEST_ASSERT_SUCCESS(status); 1486 result = uregex_lookingAt(clone2, 0, &status); 1487 TEST_ASSERT_SUCCESS(status); 1488 TEST_ASSERT(result==FALSE); 1489 result = uregex_find(clone2, 0, &status); 1490 TEST_ASSERT_SUCCESS(status); 1491 TEST_ASSERT(result==TRUE); 1492 1493 uregex_close(clone1); 1494 uregex_close(clone2); 1495 uregex_close(clone3); 1496 1497 } 1498 1499 /* 1500 * pattern() and patternText() 1501 */ 1502 { 1503 const UChar *resultPat; 1504 int32_t resultLen; 1505 UText *resultText; 1506 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ 1507 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ 1508 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */ 1509 status = U_ZERO_ERROR; 1510 1511 utext_openUTF8(&patternText, str_hello, -1, &status); 1512 re = uregex_open(pat, -1, 0, NULL, &status); 1513 resultPat = uregex_pattern(re, &resultLen, &status); 1514 TEST_ASSERT_SUCCESS(status); 1515 1516 /* The TEST_ASSERT_SUCCESS above should change too... */ 1517 if (U_SUCCESS(status)) { 1518 TEST_ASSERT(resultLen == -1); 1519 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 1520 } 1521 1522 resultText = uregex_patternUText(re, &status); 1523 TEST_ASSERT_SUCCESS(status); 1524 TEST_ASSERT_UTEXT(str_hello, resultText); 1525 1526 uregex_close(re); 1527 1528 status = U_ZERO_ERROR; 1529 re = uregex_open(pat, 3, 0, NULL, &status); 1530 resultPat = uregex_pattern(re, &resultLen, &status); 1531 TEST_ASSERT_SUCCESS(status); 1532 1533 /* The TEST_ASSERT_SUCCESS above should change too... */ 1534 if (U_SUCCESS(status)) { 1535 TEST_ASSERT(resultLen == 3); 1536 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 1537 TEST_ASSERT(u_strlen(resultPat) == 3); 1538 } 1539 1540 resultText = uregex_patternUText(re, &status); 1541 TEST_ASSERT_SUCCESS(status); 1542 TEST_ASSERT_UTEXT(str_hel, resultText); 1543 1544 uregex_close(re); 1545 } 1546 1547 /* 1548 * setUText() and lookingAt() 1549 */ 1550 { 1551 UText text1 = UTEXT_INITIALIZER; 1552 UText text2 = UTEXT_INITIALIZER; 1553 UBool result; 1554 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1555 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1556 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1557 status = U_ZERO_ERROR; 1558 utext_openUTF8(&text1, str_abcccd, -1, &status); 1559 utext_openUTF8(&text2, str_abcccxd, -1, &status); 1560 1561 utext_openUTF8(&patternText, str_abcd, -1, &status); 1562 re = uregex_openUText(&patternText, 0, NULL, &status); 1563 TEST_ASSERT_SUCCESS(status); 1564 1565 /* Operation before doing a setText should fail... */ 1566 status = U_ZERO_ERROR; 1567 uregex_lookingAt(re, 0, &status); 1568 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 1569 1570 status = U_ZERO_ERROR; 1571 uregex_setUText(re, &text1, &status); 1572 result = uregex_lookingAt(re, 0, &status); 1573 TEST_ASSERT(result == TRUE); 1574 TEST_ASSERT_SUCCESS(status); 1575 1576 status = U_ZERO_ERROR; 1577 uregex_setUText(re, &text2, &status); 1578 result = uregex_lookingAt(re, 0, &status); 1579 TEST_ASSERT(result == FALSE); 1580 TEST_ASSERT_SUCCESS(status); 1581 1582 status = U_ZERO_ERROR; 1583 uregex_setUText(re, &text1, &status); 1584 result = uregex_lookingAt(re, 0, &status); 1585 TEST_ASSERT(result == TRUE); 1586 TEST_ASSERT_SUCCESS(status); 1587 1588 uregex_close(re); 1589 utext_close(&text1); 1590 utext_close(&text2); 1591 } 1592 1593 1594 /* 1595 * getText() and getUText() 1596 */ 1597 { 1598 UText text1 = UTEXT_INITIALIZER; 1599 UText text2 = UTEXT_INITIALIZER; 1600 UChar text2Chars[20]; 1601 UText *resultText; 1602 const UChar *result; 1603 int32_t textLength; 1604 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1605 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1606 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1607 1608 1609 status = U_ZERO_ERROR; 1610 utext_openUTF8(&text1, str_abcccd, -1, &status); 1611 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars)); 1612 utext_openUChars(&text2, text2Chars, -1, &status); 1613 1614 utext_openUTF8(&patternText, str_abcd, -1, &status); 1615 re = uregex_openUText(&patternText, 0, NULL, &status); 1616 1617 /* First set a UText */ 1618 uregex_setUText(re, &text1, &status); 1619 resultText = uregex_getUText(re, NULL, &status); 1620 TEST_ASSERT_SUCCESS(status); 1621 TEST_ASSERT(resultText != &text1); 1622 utext_setNativeIndex(resultText, 0); 1623 utext_setNativeIndex(&text1, 0); 1624 TEST_ASSERT(testUTextEqual(resultText, &text1)); 1625 utext_close(resultText); 1626 1627 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */ 1628 (void)result; /* Suppress set but not used warning. */ 1629 TEST_ASSERT(textLength == -1 || textLength == 6); 1630 resultText = uregex_getUText(re, NULL, &status); 1631 TEST_ASSERT_SUCCESS(status); 1632 TEST_ASSERT(resultText != &text1); 1633 utext_setNativeIndex(resultText, 0); 1634 utext_setNativeIndex(&text1, 0); 1635 TEST_ASSERT(testUTextEqual(resultText, &text1)); 1636 utext_close(resultText); 1637 1638 /* Then set a UChar * */ 1639 uregex_setText(re, text2Chars, 7, &status); 1640 resultText = uregex_getUText(re, NULL, &status); 1641 TEST_ASSERT_SUCCESS(status); 1642 utext_setNativeIndex(resultText, 0); 1643 utext_setNativeIndex(&text2, 0); 1644 TEST_ASSERT(testUTextEqual(resultText, &text2)); 1645 utext_close(resultText); 1646 result = uregex_getText(re, &textLength, &status); 1647 TEST_ASSERT(textLength == 7); 1648 1649 uregex_close(re); 1650 utext_close(&text1); 1651 utext_close(&text2); 1652 } 1653 1654 /* 1655 * matches() 1656 */ 1657 { 1658 UText text1 = UTEXT_INITIALIZER; 1659 UBool result; 1660 UText nullText = UTEXT_INITIALIZER; 1661 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */ 1662 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */ 1663 1664 status = U_ZERO_ERROR; 1665 utext_openUTF8(&text1, str_abcccde, -1, &status); 1666 utext_openUTF8(&patternText, str_abcd, -1, &status); 1667 re = uregex_openUText(&patternText, 0, NULL, &status); 1668 1669 uregex_setUText(re, &text1, &status); 1670 result = uregex_matches(re, 0, &status); 1671 TEST_ASSERT(result == FALSE); 1672 TEST_ASSERT_SUCCESS(status); 1673 uregex_close(re); 1674 1675 status = U_ZERO_ERROR; 1676 re = uregex_openC(".?", 0, NULL, &status); 1677 uregex_setUText(re, &text1, &status); 1678 result = uregex_matches(re, 7, &status); 1679 TEST_ASSERT(result == TRUE); 1680 TEST_ASSERT_SUCCESS(status); 1681 1682 status = U_ZERO_ERROR; 1683 utext_openUTF8(&nullText, "", -1, &status); 1684 uregex_setUText(re, &nullText, &status); 1685 TEST_ASSERT_SUCCESS(status); 1686 result = uregex_matches(re, 0, &status); 1687 TEST_ASSERT(result == TRUE); 1688 TEST_ASSERT_SUCCESS(status); 1689 1690 uregex_close(re); 1691 utext_close(&text1); 1692 utext_close(&nullText); 1693 } 1694 1695 1696 /* 1697 * lookingAt() Used in setText test. 1698 */ 1699 1700 1701 /* 1702 * find(), findNext, start, end, reset 1703 */ 1704 { 1705 UChar text1[50]; 1706 UBool result; 1707 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); 1708 status = U_ZERO_ERROR; 1709 re = uregex_openC("rx", 0, NULL, &status); 1710 1711 uregex_setText(re, text1, -1, &status); 1712 result = uregex_find(re, 0, &status); 1713 TEST_ASSERT(result == TRUE); 1714 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1715 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1716 TEST_ASSERT_SUCCESS(status); 1717 1718 result = uregex_find(re, 9, &status); 1719 TEST_ASSERT(result == TRUE); 1720 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 1721 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 1722 TEST_ASSERT_SUCCESS(status); 1723 1724 result = uregex_find(re, 14, &status); 1725 TEST_ASSERT(result == FALSE); 1726 TEST_ASSERT_SUCCESS(status); 1727 1728 status = U_ZERO_ERROR; 1729 uregex_reset(re, 0, &status); 1730 1731 result = uregex_findNext(re, &status); 1732 TEST_ASSERT(result == TRUE); 1733 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1734 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1735 TEST_ASSERT_SUCCESS(status); 1736 1737 result = uregex_findNext(re, &status); 1738 TEST_ASSERT(result == TRUE); 1739 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 1740 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 1741 TEST_ASSERT_SUCCESS(status); 1742 1743 status = U_ZERO_ERROR; 1744 uregex_reset(re, 12, &status); 1745 1746 result = uregex_findNext(re, &status); 1747 TEST_ASSERT(result == TRUE); 1748 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 1749 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 1750 TEST_ASSERT_SUCCESS(status); 1751 1752 result = uregex_findNext(re, &status); 1753 TEST_ASSERT(result == FALSE); 1754 TEST_ASSERT_SUCCESS(status); 1755 1756 uregex_close(re); 1757 } 1758 1759 /* 1760 * groupUText() 1761 */ 1762 { 1763 UChar text1[80]; 1764 UText *actual; 1765 UBool result; 1766 int64_t groupLen = 0; 1767 UChar groupBuf[20]; 1768 1769 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1)); 1770 1771 status = U_ZERO_ERROR; 1772 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 1773 TEST_ASSERT_SUCCESS(status); 1774 1775 uregex_setText(re, text1, -1, &status); 1776 result = uregex_find(re, 0, &status); 1777 TEST_ASSERT(result==TRUE); 1778 1779 /* Capture Group 0 with shallow clone API. Should succeed. */ 1780 status = U_ZERO_ERROR; 1781 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status); 1782 TEST_ASSERT_SUCCESS(status); 1783 1784 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */ 1785 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */ 1786 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status); 1787 1788 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE); 1789 utext_close(actual); 1790 1791 /* Capture group #1. Should succeed. */ 1792 status = U_ZERO_ERROR; 1793 1794 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status); 1795 TEST_ASSERT_SUCCESS(status); 1796 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */ 1797 /* (within the string text1) */ 1798 TEST_ASSERT(10 == groupLen); /* length of " interior " */ 1799 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status); 1800 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE); 1801 1802 utext_close(actual); 1803 1804 /* Capture group out of range. Error. */ 1805 status = U_ZERO_ERROR; 1806 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status); 1807 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1808 utext_close(actual); 1809 1810 uregex_close(re); 1811 } 1812 1813 /* 1814 * replaceFirst() 1815 */ 1816 { 1817 UChar text1[80]; 1818 UChar text2[80]; 1819 UText replText = UTEXT_INITIALIZER; 1820 UText *result; 1821 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */ 1822 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1823 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 1824 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */ 1825 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1826 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ 1827 status = U_ZERO_ERROR; 1828 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 1829 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1830 utext_openUTF8(&replText, str_1x, -1, &status); 1831 1832 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1833 TEST_ASSERT_SUCCESS(status); 1834 1835 /* Normal case, with match */ 1836 uregex_setText(re, text1, -1, &status); 1837 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1838 TEST_ASSERT_SUCCESS(status); 1839 TEST_ASSERT_UTEXT(str_Replxxx, result); 1840 utext_close(result); 1841 1842 /* No match. Text should copy to output with no changes. */ 1843 uregex_setText(re, text2, -1, &status); 1844 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1845 TEST_ASSERT_SUCCESS(status); 1846 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1847 utext_close(result); 1848 1849 /* Unicode escapes */ 1850 uregex_setText(re, text1, -1, &status); 1851 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status); 1852 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1853 TEST_ASSERT_SUCCESS(status); 1854 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result); 1855 utext_close(result); 1856 1857 uregex_close(re); 1858 utext_close(&replText); 1859 } 1860 1861 1862 /* 1863 * replaceAll() 1864 */ 1865 { 1866 UChar text1[80]; 1867 UChar text2[80]; 1868 UText replText = UTEXT_INITIALIZER; 1869 UText *result; 1870 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1871 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ 1872 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1873 status = U_ZERO_ERROR; 1874 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); 1875 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1876 utext_openUTF8(&replText, str_1, -1, &status); 1877 1878 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1879 TEST_ASSERT_SUCCESS(status); 1880 1881 /* Normal case, with match */ 1882 uregex_setText(re, text1, -1, &status); 1883 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1884 TEST_ASSERT_SUCCESS(status); 1885 TEST_ASSERT_UTEXT(str_Replaceaa1, result); 1886 utext_close(result); 1887 1888 /* No match. Text should copy to output with no changes. */ 1889 uregex_setText(re, text2, -1, &status); 1890 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1891 TEST_ASSERT_SUCCESS(status); 1892 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1893 utext_close(result); 1894 1895 uregex_close(re); 1896 utext_close(&replText); 1897 } 1898 1899 1900 /* 1901 * appendReplacement() 1902 */ 1903 { 1904 UChar text[100]; 1905 UChar repl[100]; 1906 UChar buf[100]; 1907 UChar *bufPtr; 1908 int32_t bufCap; 1909 1910 status = U_ZERO_ERROR; 1911 re = uregex_openC(".*", 0, 0, &status); 1912 TEST_ASSERT_SUCCESS(status); 1913 1914 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); 1915 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); 1916 uregex_setText(re, text, -1, &status); 1917 1918 /* match covers whole target string */ 1919 uregex_find(re, 0, &status); 1920 TEST_ASSERT_SUCCESS(status); 1921 bufPtr = buf; 1922 bufCap = UPRV_LENGTHOF(buf); 1923 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1924 TEST_ASSERT_SUCCESS(status); 1925 TEST_ASSERT_STRING("some other", buf, TRUE); 1926 1927 /* Match has \u \U escapes */ 1928 uregex_find(re, 0, &status); 1929 TEST_ASSERT_SUCCESS(status); 1930 bufPtr = buf; 1931 bufCap = UPRV_LENGTHOF(buf); 1932 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl)); 1933 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1934 TEST_ASSERT_SUCCESS(status); 1935 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1936 1937 uregex_close(re); 1938 } 1939 1940 1941 /* 1942 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll(). 1943 */ 1944 1945 /* 1946 * splitUText() 1947 */ 1948 { 1949 UChar textToSplit[80]; 1950 UChar text2[80]; 1951 UText *fields[10]; 1952 int32_t numFields; 1953 int32_t i; 1954 1955 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit)); 1956 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); 1957 1958 status = U_ZERO_ERROR; 1959 re = uregex_openC(":", 0, NULL, &status); 1960 1961 1962 /* Simple split */ 1963 1964 uregex_setText(re, textToSplit, -1, &status); 1965 TEST_ASSERT_SUCCESS(status); 1966 1967 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1968 if (U_SUCCESS(status)) { 1969 memset(fields, 0, sizeof(fields)); 1970 numFields = uregex_splitUText(re, fields, 10, &status); 1971 TEST_ASSERT_SUCCESS(status); 1972 1973 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1974 if(U_SUCCESS(status)) { 1975 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */ 1976 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */ 1977 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */ 1978 TEST_ASSERT(numFields == 3); 1979 TEST_ASSERT_UTEXT(str_first, fields[0]); 1980 TEST_ASSERT_UTEXT(str_second, fields[1]); 1981 TEST_ASSERT_UTEXT(str_third, fields[2]); 1982 TEST_ASSERT(fields[3] == NULL); 1983 } 1984 for(i = 0; i < numFields; i++) { 1985 utext_close(fields[i]); 1986 } 1987 } 1988 1989 uregex_close(re); 1990 1991 1992 /* Split with too few output strings available */ 1993 status = U_ZERO_ERROR; 1994 re = uregex_openC(":", 0, NULL, &status); 1995 uregex_setText(re, textToSplit, -1, &status); 1996 TEST_ASSERT_SUCCESS(status); 1997 1998 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1999 if(U_SUCCESS(status)) { 2000 fields[0] = NULL; 2001 fields[1] = NULL; 2002 fields[2] = &patternText; 2003 numFields = uregex_splitUText(re, fields, 2, &status); 2004 TEST_ASSERT_SUCCESS(status); 2005 2006 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2007 if(U_SUCCESS(status)) { 2008 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2009 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */ 2010 TEST_ASSERT(numFields == 2); 2011 TEST_ASSERT_UTEXT(str_first, fields[0]); 2012 TEST_ASSERT_UTEXT(str_secondthird, fields[1]); 2013 TEST_ASSERT(fields[2] == &patternText); 2014 } 2015 for(i = 0; i < numFields; i++) { 2016 utext_close(fields[i]); 2017 } 2018 } 2019 2020 uregex_close(re); 2021 } 2022 2023 /* splitUText(), part 2. Patterns with capture groups. The capture group text 2024 * comes out as additional fields. */ 2025 { 2026 UChar textToSplit[80]; 2027 UText *fields[10]; 2028 int32_t numFields; 2029 int32_t i; 2030 2031 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit)); 2032 2033 status = U_ZERO_ERROR; 2034 re = uregex_openC("<(.*?)>", 0, NULL, &status); 2035 2036 uregex_setText(re, textToSplit, -1, &status); 2037 TEST_ASSERT_SUCCESS(status); 2038 2039 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2040 if(U_SUCCESS(status)) { 2041 memset(fields, 0, sizeof(fields)); 2042 numFields = uregex_splitUText(re, fields, 10, &status); 2043 TEST_ASSERT_SUCCESS(status); 2044 2045 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2046 if(U_SUCCESS(status)) { 2047 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2048 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2049 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2050 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2051 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2052 2053 TEST_ASSERT(numFields == 5); 2054 TEST_ASSERT_UTEXT(str_first, fields[0]); 2055 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2056 TEST_ASSERT_UTEXT(str_second, fields[2]); 2057 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2058 TEST_ASSERT_UTEXT(str_third, fields[4]); 2059 TEST_ASSERT(fields[5] == NULL); 2060 } 2061 for(i = 0; i < numFields; i++) { 2062 utext_close(fields[i]); 2063 } 2064 } 2065 2066 /* Split with too few output strings available (2) */ 2067 status = U_ZERO_ERROR; 2068 fields[0] = NULL; 2069 fields[1] = NULL; 2070 fields[2] = &patternText; 2071 numFields = uregex_splitUText(re, fields, 2, &status); 2072 TEST_ASSERT_SUCCESS(status); 2073 2074 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2075 if(U_SUCCESS(status)) { 2076 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2077 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2078 TEST_ASSERT(numFields == 2); 2079 TEST_ASSERT_UTEXT(str_first, fields[0]); 2080 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]); 2081 TEST_ASSERT(fields[2] == &patternText); 2082 } 2083 for(i = 0; i < numFields; i++) { 2084 utext_close(fields[i]); 2085 } 2086 2087 2088 /* Split with too few output strings available (3) */ 2089 status = U_ZERO_ERROR; 2090 fields[0] = NULL; 2091 fields[1] = NULL; 2092 fields[2] = NULL; 2093 fields[3] = &patternText; 2094 numFields = uregex_splitUText(re, fields, 3, &status); 2095 TEST_ASSERT_SUCCESS(status); 2096 2097 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2098 if(U_SUCCESS(status)) { 2099 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2100 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2101 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2102 TEST_ASSERT(numFields == 3); 2103 TEST_ASSERT_UTEXT(str_first, fields[0]); 2104 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2105 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]); 2106 TEST_ASSERT(fields[3] == &patternText); 2107 } 2108 for(i = 0; i < numFields; i++) { 2109 utext_close(fields[i]); 2110 } 2111 2112 /* Split with just enough output strings available (5) */ 2113 status = U_ZERO_ERROR; 2114 fields[0] = NULL; 2115 fields[1] = NULL; 2116 fields[2] = NULL; 2117 fields[3] = NULL; 2118 fields[4] = NULL; 2119 fields[5] = &patternText; 2120 numFields = uregex_splitUText(re, fields, 5, &status); 2121 TEST_ASSERT_SUCCESS(status); 2122 2123 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2124 if(U_SUCCESS(status)) { 2125 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2126 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2127 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2128 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2129 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2130 2131 TEST_ASSERT(numFields == 5); 2132 TEST_ASSERT_UTEXT(str_first, fields[0]); 2133 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2134 TEST_ASSERT_UTEXT(str_second, fields[2]); 2135 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2136 TEST_ASSERT_UTEXT(str_third, fields[4]); 2137 TEST_ASSERT(fields[5] == &patternText); 2138 } 2139 for(i = 0; i < numFields; i++) { 2140 utext_close(fields[i]); 2141 } 2142 2143 /* Split, end of text is a field delimiter. */ 2144 status = U_ZERO_ERROR; 2145 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status); 2146 TEST_ASSERT_SUCCESS(status); 2147 2148 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2149 if(U_SUCCESS(status)) { 2150 memset(fields, 0, sizeof(fields)); 2151 fields[9] = &patternText; 2152 numFields = uregex_splitUText(re, fields, 9, &status); 2153 TEST_ASSERT_SUCCESS(status); 2154 2155 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2156 if(U_SUCCESS(status)) { 2157 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2158 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2159 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2160 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2161 const char str_empty[] = { 0x00 }; 2162 2163 TEST_ASSERT(numFields == 5); 2164 TEST_ASSERT_UTEXT(str_first, fields[0]); 2165 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2166 TEST_ASSERT_UTEXT(str_second, fields[2]); 2167 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2168 TEST_ASSERT_UTEXT(str_empty, fields[4]); 2169 TEST_ASSERT(fields[5] == NULL); 2170 TEST_ASSERT(fields[8] == NULL); 2171 TEST_ASSERT(fields[9] == &patternText); 2172 } 2173 for(i = 0; i < numFields; i++) { 2174 utext_close(fields[i]); 2175 } 2176 } 2177 2178 uregex_close(re); 2179 } 2180 utext_close(&patternText); 2181} 2182 2183 2184static void TestRefreshInput(void) { 2185 /* 2186 * RefreshInput changes out the input of a URegularExpression without 2187 * changing anything else in the match state. Used with Java JNI, 2188 * when Java moves the underlying string storage. This test 2189 * runs a find() loop, moving the text after the first match. 2190 * The right number of matches should still be found. 2191 */ 2192 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */ 2193 UChar movedStr[] = { 0, 0, 0, 0, 0, 0}; 2194 UErrorCode status = U_ZERO_ERROR; 2195 URegularExpression *re; 2196 UText ut1 = UTEXT_INITIALIZER; 2197 UText ut2 = UTEXT_INITIALIZER; 2198 2199 re = uregex_openC("[ABC]", 0, 0, &status); 2200 TEST_ASSERT_SUCCESS(status); 2201 2202 utext_openUChars(&ut1, testStr, -1, &status); 2203 TEST_ASSERT_SUCCESS(status); 2204 uregex_setUText(re, &ut1, &status); 2205 TEST_ASSERT_SUCCESS(status); 2206 2207 /* Find the first match "A" in the original string */ 2208 TEST_ASSERT(uregex_findNext(re, &status)); 2209 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 2210 2211 /* Move the string, kill the original string. */ 2212 u_strcpy(movedStr, testStr); 2213 u_memset(testStr, 0, u_strlen(testStr)); 2214 utext_openUChars(&ut2, movedStr, -1, &status); 2215 TEST_ASSERT_SUCCESS(status); 2216 uregex_refreshUText(re, &ut2, &status); 2217 TEST_ASSERT_SUCCESS(status); 2218 2219 /* Find the following two matches, now working in the moved string. */ 2220 TEST_ASSERT(uregex_findNext(re, &status)); 2221 TEST_ASSERT(uregex_start(re, 0, &status) == 2); 2222 TEST_ASSERT(uregex_findNext(re, &status)); 2223 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 2224 TEST_ASSERT(FALSE == uregex_findNext(re, &status)); 2225 2226 uregex_close(re); 2227} 2228 2229 2230static void TestBug8421(void) { 2231 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched 2232 * was failing. 2233 */ 2234 URegularExpression *re; 2235 UErrorCode status = U_ZERO_ERROR; 2236 int32_t limit = -1; 2237 2238 re = uregex_openC("abc", 0, 0, &status); 2239 TEST_ASSERT_SUCCESS(status); 2240 2241 limit = uregex_getTimeLimit(re, &status); 2242 TEST_ASSERT_SUCCESS(status); 2243 TEST_ASSERT(limit == 0); 2244 2245 uregex_setTimeLimit(re, 100, &status); 2246 TEST_ASSERT_SUCCESS(status); 2247 limit = uregex_getTimeLimit(re, &status); 2248 TEST_ASSERT_SUCCESS(status); 2249 TEST_ASSERT(limit == 100); 2250 2251 uregex_close(re); 2252} 2253 2254static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) { 2255 return FALSE; 2256} 2257 2258static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) { 2259 return FALSE; 2260} 2261 2262static void TestBug10815() { 2263 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER 2264 * when the callback function specified by uregex_setMatchCallback() returns FALSE 2265 */ 2266 URegularExpression *re; 2267 UErrorCode status = U_ZERO_ERROR; 2268 UChar text[100]; 2269 2270 2271 // findNext() with a find progress callback function. 2272 2273 re = uregex_openC(".z", 0, 0, &status); 2274 TEST_ASSERT_SUCCESS(status); 2275 2276 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text)); 2277 uregex_setText(re, text, -1, &status); 2278 TEST_ASSERT_SUCCESS(status); 2279 2280 uregex_setFindProgressCallback(re, FindCallback, NULL, &status); 2281 TEST_ASSERT_SUCCESS(status); 2282 2283 uregex_findNext(re, &status); 2284 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 2285 2286 uregex_close(re); 2287 2288 // findNext() with a match progress callback function. 2289 2290 status = U_ZERO_ERROR; 2291 re = uregex_openC("((xxx)*)*y", 0, 0, &status); 2292 TEST_ASSERT_SUCCESS(status); 2293 2294 // Pattern + this text gives an exponential time match. Without the callback to stop the match, 2295 // it will appear to be stuck in a (near) infinite loop. 2296 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text)); 2297 uregex_setText(re, text, -1, &status); 2298 TEST_ASSERT_SUCCESS(status); 2299 2300 uregex_setMatchCallback(re, MatchCallback, NULL, &status); 2301 TEST_ASSERT_SUCCESS(status); 2302 2303 uregex_findNext(re, &status); 2304 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 2305 2306 uregex_close(re); 2307} 2308 2309 2310#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 2311