1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2004-2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/******************************************************************************** 7* 8* File reapits.c 9* 10*********************************************************************************/ 11/*C API TEST FOR Regular Expressions */ 12/** 13* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't 14* try to test the full functionality. It just calls each function and verifies that it 15* works on a basic level. 16* 17* More complete testing of regular expression functionality is done with the C++ tests. 18**/ 19 20#include "unicode/utypes.h" 21 22#if !UCONFIG_NO_REGULAR_EXPRESSIONS 23 24#include <stdlib.h> 25#include <string.h> 26#include "unicode/uloc.h" 27#include "unicode/uregex.h" 28#include "unicode/ustring.h" 29#include "unicode/utext.h" 30#include "cintltst.h" 31 32#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 33log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} 34 35#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 36log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}} 37 38/* 39 * TEST_SETUP and TEST_TEARDOWN 40 * macros to handle the boilerplate around setting up regex test cases. 41 * parameteres to setup: 42 * pattern: The regex pattern, a (char *) null terminated C string. 43 * testString: The string data, also a (char *) C string. 44 * flags: Regex flags to set when compiling the pattern 45 * 46 * Put arbitrary test code between SETUP and TEARDOWN. 47 * 're" is the compiled, ready-to-go regular expression. 48 */ 49#define TEST_SETUP(pattern, testString, flags) { \ 50 UChar *srcString = NULL; \ 51 status = U_ZERO_ERROR; \ 52 re = uregex_openC(pattern, flags, NULL, &status); \ 53 TEST_ASSERT_SUCCESS(status); \ 54 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ 55 u_uastrncpy(srcString, testString, strlen(testString)+1); \ 56 uregex_setText(re, srcString, -1, &status); \ 57 TEST_ASSERT_SUCCESS(status); \ 58 if (U_SUCCESS(status)) { 59 60#define TEST_TEARDOWN \ 61 } \ 62 TEST_ASSERT_SUCCESS(status); \ 63 uregex_close(re); \ 64 free(srcString); \ 65 } 66 67 68/** 69 * @param expected utf-8 array of bytes to be expected 70 */ 71static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) { 72 char buf_inside_macro[120]; 73 int32_t len = (int32_t)strlen(expected); 74 UBool success; 75 if (nulTerm) { 76 u_austrncpy(buf_inside_macro, (actual), len+1); 77 buf_inside_macro[len+2] = 0; 78 success = (strcmp((expected), buf_inside_macro) == 0); 79 } else { 80 u_austrncpy(buf_inside_macro, (actual), len); 81 buf_inside_macro[len+1] = 0; 82 success = (strncmp((expected), buf_inside_macro, len) == 0); 83 } 84 if (success == FALSE) { 85 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", 86 file, line, (expected), buf_inside_macro); 87 } 88} 89 90#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) 91 92 93static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) { 94 UErrorCode status = U_ZERO_ERROR; 95 UText expectedText = UTEXT_INITIALIZER; 96 utext_openUTF8(&expectedText, expected, -1, &status); 97 utext_setNativeIndex(actual, 0); 98 if (utext_compare(&expectedText, -1, actual, -1) != 0) { 99 UChar32 c; 100 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected); 101 c = utext_next32From(actual, 0); 102 while (c != U_SENTINEL) { 103 if (0x20<c && c <0x7e) { 104 log_err("%c", c); 105 } else { 106 log_err("%#x", c); 107 } 108 c = UTEXT_NEXT32(actual); 109 } 110 log_err("\"\n"); 111 } 112 utext_close(&expectedText); 113} 114 115#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__) 116 117 118 119static void TestRegexCAPI(void); 120static void TestBug4315(void); 121static void TestUTextAPI(void); 122static void TestRefreshInput(void); 123static void TestBug8421(void); 124 125void addURegexTest(TestNode** root); 126 127void addURegexTest(TestNode** root) 128{ 129 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); 130 addTest(root, &TestBug4315, "regex/TestBug4315"); 131 addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); 132 addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); 133 addTest(root, &TestBug8421, "regex/TestBug8421"); 134} 135 136/* 137 * Call back function and context struct used for testing 138 * regular expression user callbacks. This test is mostly the same as 139 * the corresponding C++ test in intltest. 140 */ 141typedef struct callBackContext { 142 int32_t maxCalls; 143 int32_t numCalls; 144 int32_t lastSteps; 145} callBackContext; 146 147static UBool U_EXPORT2 U_CALLCONV 148TestCallbackFn(const void *context, int32_t steps) { 149 callBackContext *info = (callBackContext *)context; 150 if (info->lastSteps+1 != steps) { 151 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps); 152 } 153 info->lastSteps = steps; 154 info->numCalls++; 155 return (info->numCalls < info->maxCalls); 156} 157 158/* 159 * Regular Expression C API Tests 160 */ 161static void TestRegexCAPI(void) { 162 UErrorCode status = U_ZERO_ERROR; 163 URegularExpression *re; 164 UChar pat[200]; 165 UChar *minus1; 166 167 memset(&minus1, -1, sizeof(minus1)); 168 169 /* Mimimalist open/close */ 170 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 171 re = uregex_open(pat, -1, 0, 0, &status); 172 if (U_FAILURE(status)) { 173 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 174 return; 175 } 176 uregex_close(re); 177 178 /* Open with all flag values set */ 179 status = U_ZERO_ERROR; 180 re = uregex_open(pat, -1, 181 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 182 0, &status); 183 TEST_ASSERT_SUCCESS(status); 184 uregex_close(re); 185 186 /* Open with an invalid flag */ 187 status = U_ZERO_ERROR; 188 re = uregex_open(pat, -1, 0x40000000, 0, &status); 189 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 190 uregex_close(re); 191 192 /* Open with an unimplemented flag */ 193 status = U_ZERO_ERROR; 194 re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status); 195 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); 196 uregex_close(re); 197 198 /* openC with an invalid parameter */ 199 status = U_ZERO_ERROR; 200 re = uregex_openC(NULL, 201 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 202 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 203 204 /* openC with an invalid parameter */ 205 status = U_USELESS_COLLATOR_ERROR; 206 re = uregex_openC(NULL, 207 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 208 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); 209 210 /* openC open from a C string */ 211 { 212 const UChar *p; 213 int32_t len; 214 status = U_ZERO_ERROR; 215 re = uregex_openC("abc*", 0, 0, &status); 216 TEST_ASSERT_SUCCESS(status); 217 p = uregex_pattern(re, &len, &status); 218 TEST_ASSERT_SUCCESS(status); 219 220 /* The TEST_ASSERT_SUCCESS above should change too... */ 221 if(U_SUCCESS(status)) { 222 u_uastrncpy(pat, "abc*", sizeof(pat)/2); 223 TEST_ASSERT(u_strcmp(pat, p) == 0); 224 TEST_ASSERT(len==(int32_t)strlen("abc*")); 225 } 226 227 uregex_close(re); 228 229 /* TODO: Open with ParseError parameter */ 230 } 231 232 /* 233 * clone 234 */ 235 { 236 URegularExpression *clone1; 237 URegularExpression *clone2; 238 URegularExpression *clone3; 239 UChar testString1[30]; 240 UChar testString2[30]; 241 UBool result; 242 243 244 status = U_ZERO_ERROR; 245 re = uregex_openC("abc*", 0, 0, &status); 246 TEST_ASSERT_SUCCESS(status); 247 clone1 = uregex_clone(re, &status); 248 TEST_ASSERT_SUCCESS(status); 249 TEST_ASSERT(clone1 != NULL); 250 251 status = U_ZERO_ERROR; 252 clone2 = uregex_clone(re, &status); 253 TEST_ASSERT_SUCCESS(status); 254 TEST_ASSERT(clone2 != NULL); 255 uregex_close(re); 256 257 status = U_ZERO_ERROR; 258 clone3 = uregex_clone(clone2, &status); 259 TEST_ASSERT_SUCCESS(status); 260 TEST_ASSERT(clone3 != NULL); 261 262 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); 263 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); 264 265 status = U_ZERO_ERROR; 266 uregex_setText(clone1, testString1, -1, &status); 267 TEST_ASSERT_SUCCESS(status); 268 result = uregex_lookingAt(clone1, 0, &status); 269 TEST_ASSERT_SUCCESS(status); 270 TEST_ASSERT(result==TRUE); 271 272 status = U_ZERO_ERROR; 273 uregex_setText(clone2, testString2, -1, &status); 274 TEST_ASSERT_SUCCESS(status); 275 result = uregex_lookingAt(clone2, 0, &status); 276 TEST_ASSERT_SUCCESS(status); 277 TEST_ASSERT(result==FALSE); 278 result = uregex_find(clone2, 0, &status); 279 TEST_ASSERT_SUCCESS(status); 280 TEST_ASSERT(result==TRUE); 281 282 uregex_close(clone1); 283 uregex_close(clone2); 284 uregex_close(clone3); 285 286 } 287 288 /* 289 * pattern() 290 */ 291 { 292 const UChar *resultPat; 293 int32_t resultLen; 294 u_uastrncpy(pat, "hello", sizeof(pat)/2); 295 status = U_ZERO_ERROR; 296 re = uregex_open(pat, -1, 0, NULL, &status); 297 resultPat = uregex_pattern(re, &resultLen, &status); 298 TEST_ASSERT_SUCCESS(status); 299 300 /* The TEST_ASSERT_SUCCESS above should change too... */ 301 if (U_SUCCESS(status)) { 302 TEST_ASSERT(resultLen == -1); 303 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 304 } 305 306 uregex_close(re); 307 308 status = U_ZERO_ERROR; 309 re = uregex_open(pat, 3, 0, NULL, &status); 310 resultPat = uregex_pattern(re, &resultLen, &status); 311 TEST_ASSERT_SUCCESS(status); 312 TEST_ASSERT_SUCCESS(status); 313 314 /* The TEST_ASSERT_SUCCESS above should change too... */ 315 if (U_SUCCESS(status)) { 316 TEST_ASSERT(resultLen == 3); 317 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 318 TEST_ASSERT(u_strlen(resultPat) == 3); 319 } 320 321 uregex_close(re); 322 } 323 324 /* 325 * flags() 326 */ 327 { 328 int32_t t; 329 330 status = U_ZERO_ERROR; 331 re = uregex_open(pat, -1, 0, NULL, &status); 332 t = uregex_flags(re, &status); 333 TEST_ASSERT_SUCCESS(status); 334 TEST_ASSERT(t == 0); 335 uregex_close(re); 336 337 status = U_ZERO_ERROR; 338 re = uregex_open(pat, -1, 0, NULL, &status); 339 t = uregex_flags(re, &status); 340 TEST_ASSERT_SUCCESS(status); 341 TEST_ASSERT(t == 0); 342 uregex_close(re); 343 344 status = U_ZERO_ERROR; 345 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status); 346 t = uregex_flags(re, &status); 347 TEST_ASSERT_SUCCESS(status); 348 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); 349 uregex_close(re); 350 } 351 352 /* 353 * setText() and lookingAt() 354 */ 355 { 356 UChar text1[50]; 357 UChar text2[50]; 358 UBool result; 359 360 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 361 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 362 status = U_ZERO_ERROR; 363 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 364 re = uregex_open(pat, -1, 0, NULL, &status); 365 TEST_ASSERT_SUCCESS(status); 366 367 /* Operation before doing a setText should fail... */ 368 status = U_ZERO_ERROR; 369 uregex_lookingAt(re, 0, &status); 370 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 371 372 status = U_ZERO_ERROR; 373 uregex_setText(re, text1, -1, &status); 374 result = uregex_lookingAt(re, 0, &status); 375 TEST_ASSERT(result == TRUE); 376 TEST_ASSERT_SUCCESS(status); 377 378 status = U_ZERO_ERROR; 379 uregex_setText(re, text2, -1, &status); 380 result = uregex_lookingAt(re, 0, &status); 381 TEST_ASSERT(result == FALSE); 382 TEST_ASSERT_SUCCESS(status); 383 384 status = U_ZERO_ERROR; 385 uregex_setText(re, text1, -1, &status); 386 result = uregex_lookingAt(re, 0, &status); 387 TEST_ASSERT(result == TRUE); 388 TEST_ASSERT_SUCCESS(status); 389 390 status = U_ZERO_ERROR; 391 uregex_setText(re, text1, 5, &status); 392 result = uregex_lookingAt(re, 0, &status); 393 TEST_ASSERT(result == FALSE); 394 TEST_ASSERT_SUCCESS(status); 395 396 status = U_ZERO_ERROR; 397 uregex_setText(re, text1, 6, &status); 398 result = uregex_lookingAt(re, 0, &status); 399 TEST_ASSERT(result == TRUE); 400 TEST_ASSERT_SUCCESS(status); 401 402 uregex_close(re); 403 } 404 405 406 /* 407 * getText() 408 */ 409 { 410 UChar text1[50]; 411 UChar text2[50]; 412 const UChar *result; 413 int32_t textLength; 414 415 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); 416 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); 417 status = U_ZERO_ERROR; 418 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 419 re = uregex_open(pat, -1, 0, NULL, &status); 420 421 uregex_setText(re, text1, -1, &status); 422 result = uregex_getText(re, &textLength, &status); 423 TEST_ASSERT(result == text1); 424 TEST_ASSERT(textLength == -1); 425 TEST_ASSERT_SUCCESS(status); 426 427 status = U_ZERO_ERROR; 428 uregex_setText(re, text2, 7, &status); 429 result = uregex_getText(re, &textLength, &status); 430 TEST_ASSERT(result == text2); 431 TEST_ASSERT(textLength == 7); 432 TEST_ASSERT_SUCCESS(status); 433 434 status = U_ZERO_ERROR; 435 uregex_setText(re, text2, 4, &status); 436 result = uregex_getText(re, &textLength, &status); 437 TEST_ASSERT(result == text2); 438 TEST_ASSERT(textLength == 4); 439 TEST_ASSERT_SUCCESS(status); 440 uregex_close(re); 441 } 442 443 /* 444 * matches() 445 */ 446 { 447 UChar text1[50]; 448 UBool result; 449 int len; 450 UChar nullString[] = {0,0,0}; 451 452 u_uastrncpy(text1, "abcccde", sizeof(text1)/2); 453 status = U_ZERO_ERROR; 454 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); 455 re = uregex_open(pat, -1, 0, NULL, &status); 456 457 uregex_setText(re, text1, -1, &status); 458 result = uregex_matches(re, 0, &status); 459 TEST_ASSERT(result == FALSE); 460 TEST_ASSERT_SUCCESS(status); 461 462 status = U_ZERO_ERROR; 463 uregex_setText(re, text1, 6, &status); 464 result = uregex_matches(re, 0, &status); 465 TEST_ASSERT(result == TRUE); 466 TEST_ASSERT_SUCCESS(status); 467 468 status = U_ZERO_ERROR; 469 uregex_setText(re, text1, 6, &status); 470 result = uregex_matches(re, 1, &status); 471 TEST_ASSERT(result == FALSE); 472 TEST_ASSERT_SUCCESS(status); 473 uregex_close(re); 474 475 status = U_ZERO_ERROR; 476 re = uregex_openC(".?", 0, NULL, &status); 477 uregex_setText(re, text1, -1, &status); 478 len = u_strlen(text1); 479 result = uregex_matches(re, len, &status); 480 TEST_ASSERT(result == TRUE); 481 TEST_ASSERT_SUCCESS(status); 482 483 status = U_ZERO_ERROR; 484 uregex_setText(re, nullString, -1, &status); 485 TEST_ASSERT_SUCCESS(status); 486 result = uregex_matches(re, 0, &status); 487 TEST_ASSERT(result == TRUE); 488 TEST_ASSERT_SUCCESS(status); 489 uregex_close(re); 490 } 491 492 493 /* 494 * lookingAt() Used in setText test. 495 */ 496 497 498 /* 499 * find(), findNext, start, end, reset 500 */ 501 { 502 UChar text1[50]; 503 UBool result; 504 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); 505 status = U_ZERO_ERROR; 506 re = uregex_openC("rx", 0, NULL, &status); 507 508 uregex_setText(re, text1, -1, &status); 509 result = uregex_find(re, 0, &status); 510 TEST_ASSERT(result == TRUE); 511 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 512 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 513 TEST_ASSERT_SUCCESS(status); 514 515 result = uregex_find(re, 9, &status); 516 TEST_ASSERT(result == TRUE); 517 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 518 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 519 TEST_ASSERT_SUCCESS(status); 520 521 result = uregex_find(re, 14, &status); 522 TEST_ASSERT(result == FALSE); 523 TEST_ASSERT_SUCCESS(status); 524 525 status = U_ZERO_ERROR; 526 uregex_reset(re, 0, &status); 527 528 result = uregex_findNext(re, &status); 529 TEST_ASSERT(result == TRUE); 530 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 531 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 532 TEST_ASSERT_SUCCESS(status); 533 534 result = uregex_findNext(re, &status); 535 TEST_ASSERT(result == TRUE); 536 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 537 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 538 TEST_ASSERT_SUCCESS(status); 539 540 status = U_ZERO_ERROR; 541 uregex_reset(re, 12, &status); 542 543 result = uregex_findNext(re, &status); 544 TEST_ASSERT(result == TRUE); 545 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 546 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 547 TEST_ASSERT_SUCCESS(status); 548 549 result = uregex_findNext(re, &status); 550 TEST_ASSERT(result == FALSE); 551 TEST_ASSERT_SUCCESS(status); 552 553 uregex_close(re); 554 } 555 556 /* 557 * groupCount 558 */ 559 { 560 int32_t result; 561 562 status = U_ZERO_ERROR; 563 re = uregex_openC("abc", 0, NULL, &status); 564 result = uregex_groupCount(re, &status); 565 TEST_ASSERT_SUCCESS(status); 566 TEST_ASSERT(result == 0); 567 uregex_close(re); 568 569 status = U_ZERO_ERROR; 570 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); 571 result = uregex_groupCount(re, &status); 572 TEST_ASSERT_SUCCESS(status); 573 TEST_ASSERT(result == 3); 574 uregex_close(re); 575 576 } 577 578 579 /* 580 * group() 581 */ 582 { 583 UChar text1[80]; 584 UChar buf[80]; 585 UBool result; 586 int32_t resultSz; 587 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 588 589 status = U_ZERO_ERROR; 590 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 591 TEST_ASSERT_SUCCESS(status); 592 593 594 uregex_setText(re, text1, -1, &status); 595 result = uregex_find(re, 0, &status); 596 TEST_ASSERT(result==TRUE); 597 598 /* Capture Group 0, the full match. Should succeed. */ 599 status = U_ZERO_ERROR; 600 resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); 601 TEST_ASSERT_SUCCESS(status); 602 TEST_ASSERT_STRING("abc interior def", buf, TRUE); 603 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 604 605 /* Capture group #1. Should succeed. */ 606 status = U_ZERO_ERROR; 607 resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); 608 TEST_ASSERT_SUCCESS(status); 609 TEST_ASSERT_STRING(" interior ", buf, TRUE); 610 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); 611 612 /* Capture group out of range. Error. */ 613 status = U_ZERO_ERROR; 614 uregex_group(re, 2, buf, sizeof(buf)/2, &status); 615 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 616 617 /* NULL buffer, pure pre-flight */ 618 status = U_ZERO_ERROR; 619 resultSz = uregex_group(re, 0, NULL, 0, &status); 620 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 621 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 622 623 /* Too small buffer, truncated string */ 624 status = U_ZERO_ERROR; 625 memset(buf, -1, sizeof(buf)); 626 resultSz = uregex_group(re, 0, buf, 5, &status); 627 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 628 TEST_ASSERT_STRING("abc i", buf, FALSE); 629 TEST_ASSERT(buf[5] == (UChar)0xffff); 630 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 631 632 /* Output string just fits buffer, no NUL term. */ 633 status = U_ZERO_ERROR; 634 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status); 635 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 636 TEST_ASSERT_STRING("abc interior def", buf, FALSE); 637 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); 638 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); 639 640 uregex_close(re); 641 642 } 643 644 /* 645 * Regions 646 */ 647 648 649 /* SetRegion(), getRegion() do something */ 650 TEST_SETUP(".*", "0123456789ABCDEF", 0) 651 UChar resultString[40]; 652 TEST_ASSERT(uregex_regionStart(re, &status) == 0); 653 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); 654 uregex_setRegion(re, 3, 6, &status); 655 TEST_ASSERT(uregex_regionStart(re, &status) == 3); 656 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); 657 TEST_ASSERT(uregex_findNext(re, &status)); 658 TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3) 659 TEST_ASSERT_STRING("345", resultString, TRUE); 660 TEST_TEARDOWN; 661 662 /* find(start=-1) uses regions */ 663 TEST_SETUP(".*", "0123456789ABCDEF", 0); 664 uregex_setRegion(re, 4, 6, &status); 665 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 666 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 667 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 668 TEST_TEARDOWN; 669 670 /* find (start >=0) does not use regions */ 671 TEST_SETUP(".*", "0123456789ABCDEF", 0); 672 uregex_setRegion(re, 4, 6, &status); 673 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 674 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 675 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 676 TEST_TEARDOWN; 677 678 /* findNext() obeys regions */ 679 TEST_SETUP(".", "0123456789ABCDEF", 0); 680 uregex_setRegion(re, 4, 6, &status); 681 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); 682 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 683 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); 684 TEST_ASSERT(uregex_start(re, 0, &status) == 5); 685 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); 686 TEST_TEARDOWN; 687 688 /* matches(start=-1) uses regions */ 689 /* Also, verify that non-greedy *? succeeds in finding the full match. */ 690 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 691 uregex_setRegion(re, 4, 6, &status); 692 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); 693 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 694 TEST_ASSERT(uregex_end(re, 0, &status) == 6); 695 TEST_TEARDOWN; 696 697 /* matches (start >=0) does not use regions */ 698 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 699 uregex_setRegion(re, 4, 6, &status); 700 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); 701 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 702 TEST_ASSERT(uregex_end(re, 0, &status) == 16); 703 TEST_TEARDOWN; 704 705 /* lookingAt(start=-1) uses regions */ 706 /* Also, verify that non-greedy *? finds the first (shortest) match. */ 707 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 708 uregex_setRegion(re, 4, 6, &status); 709 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); 710 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 711 TEST_ASSERT(uregex_end(re, 0, &status) == 4); 712 TEST_TEARDOWN; 713 714 /* lookingAt (start >=0) does not use regions */ 715 TEST_SETUP(".*?", "0123456789ABCDEF", 0); 716 uregex_setRegion(re, 4, 6, &status); 717 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); 718 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 719 TEST_ASSERT(uregex_end(re, 0, &status) == 0); 720 TEST_TEARDOWN; 721 722 /* hitEnd() */ 723 TEST_SETUP("[a-f]*", "abcdefghij", 0); 724 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 725 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); 726 TEST_TEARDOWN; 727 728 TEST_SETUP("[a-f]*", "abcdef", 0); 729 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 730 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); 731 TEST_TEARDOWN; 732 733 /* requireEnd */ 734 TEST_SETUP("abcd", "abcd", 0); 735 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 736 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); 737 TEST_TEARDOWN; 738 739 TEST_SETUP("abcd$", "abcd", 0); 740 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); 741 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); 742 TEST_TEARDOWN; 743 744 /* anchoringBounds */ 745 TEST_SETUP("abc$", "abcdef", 0); 746 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); 747 uregex_useAnchoringBounds(re, FALSE, &status); 748 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); 749 750 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); 751 uregex_useAnchoringBounds(re, TRUE, &status); 752 uregex_setRegion(re, 0, 3, &status); 753 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); 754 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 755 TEST_TEARDOWN; 756 757 /* Transparent Bounds */ 758 TEST_SETUP("abc(?=def)", "abcdef", 0); 759 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); 760 uregex_useTransparentBounds(re, TRUE, &status); 761 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); 762 763 uregex_useTransparentBounds(re, FALSE, &status); 764 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ 765 uregex_setRegion(re, 0, 3, &status); 766 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */ 767 uregex_useTransparentBounds(re, TRUE, &status); 768 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */ 769 TEST_ASSERT(uregex_end(re, 0, &status) == 3); 770 TEST_TEARDOWN; 771 772 773 /* 774 * replaceFirst() 775 */ 776 { 777 UChar text1[80]; 778 UChar text2[80]; 779 UChar replText[80]; 780 UChar buf[80]; 781 int32_t resultSz; 782 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 783 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 784 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 785 786 status = U_ZERO_ERROR; 787 re = uregex_openC("x(.*?)x", 0, NULL, &status); 788 TEST_ASSERT_SUCCESS(status); 789 790 /* Normal case, with match */ 791 uregex_setText(re, text1, -1, &status); 792 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 793 TEST_ASSERT_SUCCESS(status); 794 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); 795 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 796 797 /* No match. Text should copy to output with no changes. */ 798 status = U_ZERO_ERROR; 799 uregex_setText(re, text2, -1, &status); 800 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); 801 TEST_ASSERT_SUCCESS(status); 802 TEST_ASSERT_STRING("No match here.", buf, TRUE); 803 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); 804 805 /* Match, output just fills buffer, no termination warning. */ 806 status = U_ZERO_ERROR; 807 uregex_setText(re, text1, -1, &status); 808 memset(buf, -1, sizeof(buf)); 809 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 810 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 811 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 812 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 813 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 814 815 /* Do the replaceFirst again, without first resetting anything. 816 * Should give the same results. 817 */ 818 status = U_ZERO_ERROR; 819 memset(buf, -1, sizeof(buf)); 820 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); 821 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 822 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); 823 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 824 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 825 826 /* NULL buffer, zero buffer length */ 827 status = U_ZERO_ERROR; 828 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); 829 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 830 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 831 832 /* Buffer too small by one */ 833 status = U_ZERO_ERROR; 834 memset(buf, -1, sizeof(buf)); 835 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status); 836 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 837 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); 838 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); 839 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); 840 841 uregex_close(re); 842 } 843 844 845 /* 846 * replaceAll() 847 */ 848 { 849 UChar text1[80]; /* "Replace xaax x1x x...x." */ 850 UChar text2[80]; /* "No match Here" */ 851 UChar replText[80]; /* "<$1>" */ 852 UChar replText2[80]; /* "<<$1>>" */ 853 const char * pattern = "x(.*?)x"; 854 const char * expectedResult = "Replace <aa> <1> <...>."; 855 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; 856 UChar buf[80]; 857 int32_t resultSize; 858 int32_t expectedResultSize; 859 int32_t expectedResultSize2; 860 int32_t i; 861 862 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 863 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 864 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); 865 u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2); 866 expectedResultSize = strlen(expectedResult); 867 expectedResultSize2 = strlen(expectedResult2); 868 869 status = U_ZERO_ERROR; 870 re = uregex_openC(pattern, 0, NULL, &status); 871 TEST_ASSERT_SUCCESS(status); 872 873 /* Normal case, with match */ 874 uregex_setText(re, text1, -1, &status); 875 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 876 TEST_ASSERT_SUCCESS(status); 877 TEST_ASSERT_STRING(expectedResult, buf, TRUE); 878 TEST_ASSERT(resultSize == expectedResultSize); 879 880 /* No match. Text should copy to output with no changes. */ 881 status = U_ZERO_ERROR; 882 uregex_setText(re, text2, -1, &status); 883 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); 884 TEST_ASSERT_SUCCESS(status); 885 TEST_ASSERT_STRING("No match here.", buf, TRUE); 886 TEST_ASSERT(resultSize == u_strlen(text2)); 887 888 /* Match, output just fills buffer, no termination warning. */ 889 status = U_ZERO_ERROR; 890 uregex_setText(re, text1, -1, &status); 891 memset(buf, -1, sizeof(buf)); 892 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status); 893 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 894 TEST_ASSERT_STRING(expectedResult, buf, FALSE); 895 TEST_ASSERT(resultSize == expectedResultSize); 896 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 897 898 /* Do the replaceFirst again, without first resetting anything. 899 * Should give the same results. 900 */ 901 status = U_ZERO_ERROR; 902 memset(buf, -1, sizeof(buf)); 903 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); 904 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 905 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); 906 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 907 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); 908 909 /* NULL buffer, zero buffer length */ 910 status = U_ZERO_ERROR; 911 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); 912 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 913 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); 914 915 /* Buffer too small. Try every size, which will tickle edge cases 916 * in uregex_appendReplacement (used by replaceAll) */ 917 for (i=0; i<expectedResultSize; i++) { 918 char expected[80]; 919 status = U_ZERO_ERROR; 920 memset(buf, -1, sizeof(buf)); 921 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); 922 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 923 strcpy(expected, expectedResult); 924 expected[i] = 0; 925 TEST_ASSERT_STRING(expected, buf, FALSE); 926 TEST_ASSERT(resultSize == expectedResultSize); 927 TEST_ASSERT(buf[i] == (UChar)0xffff); 928 } 929 930 /* Buffer too small. Same as previous test, except this time the replacement 931 * text is longer than the match capture group, making the length of the complete 932 * replacement longer than the original string. 933 */ 934 for (i=0; i<expectedResultSize2; i++) { 935 char expected[80]; 936 status = U_ZERO_ERROR; 937 memset(buf, -1, sizeof(buf)); 938 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); 939 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 940 strcpy(expected, expectedResult2); 941 expected[i] = 0; 942 TEST_ASSERT_STRING(expected, buf, FALSE); 943 TEST_ASSERT(resultSize == expectedResultSize2); 944 TEST_ASSERT(buf[i] == (UChar)0xffff); 945 } 946 947 948 uregex_close(re); 949 } 950 951 952 /* 953 * appendReplacement() 954 */ 955 { 956 UChar text[100]; 957 UChar repl[100]; 958 UChar buf[100]; 959 UChar *bufPtr; 960 int32_t bufCap; 961 962 963 status = U_ZERO_ERROR; 964 re = uregex_openC(".*", 0, 0, &status); 965 TEST_ASSERT_SUCCESS(status); 966 967 u_uastrncpy(text, "whatever", sizeof(text)/2); 968 u_uastrncpy(repl, "some other", sizeof(repl)/2); 969 uregex_setText(re, text, -1, &status); 970 971 /* match covers whole target string */ 972 uregex_find(re, 0, &status); 973 TEST_ASSERT_SUCCESS(status); 974 bufPtr = buf; 975 bufCap = sizeof(buf) / 2; 976 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 977 TEST_ASSERT_SUCCESS(status); 978 TEST_ASSERT_STRING("some other", buf, TRUE); 979 980 /* Match has \u \U escapes */ 981 uregex_find(re, 0, &status); 982 TEST_ASSERT_SUCCESS(status); 983 bufPtr = buf; 984 bufCap = sizeof(buf) / 2; 985 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); 986 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 987 TEST_ASSERT_SUCCESS(status); 988 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 989 990 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */ 991 status = U_ZERO_ERROR; 992 uregex_find(re, 0, &status); 993 TEST_ASSERT_SUCCESS(status); 994 bufPtr = buf; 995 status = U_BUFFER_OVERFLOW_ERROR; 996 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); 997 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 998 999 uregex_close(re); 1000 } 1001 1002 1003 /* 1004 * appendTail(). Checked in ReplaceFirst(), replaceAll(). 1005 */ 1006 1007 /* 1008 * split() 1009 */ 1010 { 1011 UChar textToSplit[80]; 1012 UChar text2[80]; 1013 UChar buf[200]; 1014 UChar *fields[10]; 1015 int32_t numFields; 1016 int32_t requiredCapacity; 1017 int32_t spaceNeeded; 1018 int32_t sz; 1019 1020 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); 1021 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1022 1023 status = U_ZERO_ERROR; 1024 re = uregex_openC(":", 0, NULL, &status); 1025 1026 1027 /* Simple split */ 1028 1029 uregex_setText(re, textToSplit, -1, &status); 1030 TEST_ASSERT_SUCCESS(status); 1031 1032 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1033 if (U_SUCCESS(status)) { 1034 memset(fields, -1, sizeof(fields)); 1035 numFields = 1036 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 1037 TEST_ASSERT_SUCCESS(status); 1038 1039 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1040 if(U_SUCCESS(status)) { 1041 TEST_ASSERT(numFields == 3); 1042 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1043 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1044 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1045 TEST_ASSERT(fields[3] == NULL); 1046 1047 spaceNeeded = u_strlen(textToSplit) - 1048 (numFields - 1) + /* Field delimiters do not appear in output */ 1049 numFields; /* Each field gets a NUL terminator */ 1050 1051 TEST_ASSERT(spaceNeeded == requiredCapacity); 1052 } 1053 } 1054 1055 uregex_close(re); 1056 1057 1058 /* Split with too few output strings available */ 1059 status = U_ZERO_ERROR; 1060 re = uregex_openC(":", 0, NULL, &status); 1061 uregex_setText(re, textToSplit, -1, &status); 1062 TEST_ASSERT_SUCCESS(status); 1063 1064 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1065 if(U_SUCCESS(status)) { 1066 memset(fields, -1, sizeof(fields)); 1067 numFields = 1068 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1069 TEST_ASSERT_SUCCESS(status); 1070 1071 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1072 if(U_SUCCESS(status)) { 1073 TEST_ASSERT(numFields == 2); 1074 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1075 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); 1076 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1077 1078 spaceNeeded = u_strlen(textToSplit) - 1079 (numFields - 1) + /* Field delimiters do not appear in output */ 1080 numFields; /* Each field gets a NUL terminator */ 1081 1082 TEST_ASSERT(spaceNeeded == requiredCapacity); 1083 1084 /* Split with a range of output buffer sizes. */ 1085 spaceNeeded = u_strlen(textToSplit) - 1086 (numFields - 1) + /* Field delimiters do not appear in output */ 1087 numFields; /* Each field gets a NUL terminator */ 1088 1089 for (sz=0; sz < spaceNeeded+1; sz++) { 1090 memset(fields, -1, sizeof(fields)); 1091 status = U_ZERO_ERROR; 1092 numFields = 1093 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status); 1094 if (sz >= spaceNeeded) { 1095 TEST_ASSERT_SUCCESS(status); 1096 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1097 TEST_ASSERT_STRING(" second", fields[1], TRUE); 1098 TEST_ASSERT_STRING(" third", fields[2], TRUE); 1099 } else { 1100 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 1101 } 1102 TEST_ASSERT(numFields == 3); 1103 TEST_ASSERT(fields[3] == NULL); 1104 TEST_ASSERT(spaceNeeded == requiredCapacity); 1105 } 1106 } 1107 } 1108 1109 uregex_close(re); 1110 } 1111 1112 1113 1114 1115 /* Split(), part 2. Patterns with capture groups. The capture group text 1116 * comes out as additional fields. */ 1117 { 1118 UChar textToSplit[80]; 1119 UChar buf[200]; 1120 UChar *fields[10]; 1121 int32_t numFields; 1122 int32_t requiredCapacity; 1123 int32_t spaceNeeded; 1124 int32_t sz; 1125 1126 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); 1127 1128 status = U_ZERO_ERROR; 1129 re = uregex_openC("<(.*?)>", 0, NULL, &status); 1130 1131 uregex_setText(re, textToSplit, -1, &status); 1132 TEST_ASSERT_SUCCESS(status); 1133 1134 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1135 if(U_SUCCESS(status)) { 1136 memset(fields, -1, sizeof(fields)); 1137 numFields = 1138 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); 1139 TEST_ASSERT_SUCCESS(status); 1140 1141 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1142 if(U_SUCCESS(status)) { 1143 TEST_ASSERT(numFields == 5); 1144 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1145 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1146 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1147 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1148 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1149 TEST_ASSERT(fields[5] == NULL); 1150 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1151 TEST_ASSERT(spaceNeeded == requiredCapacity); 1152 } 1153 } 1154 1155 /* Split with too few output strings available (2) */ 1156 status = U_ZERO_ERROR; 1157 memset(fields, -1, sizeof(fields)); 1158 numFields = 1159 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); 1160 TEST_ASSERT_SUCCESS(status); 1161 1162 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1163 if(U_SUCCESS(status)) { 1164 TEST_ASSERT(numFields == 2); 1165 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1166 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); 1167 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); 1168 1169 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */ 1170 TEST_ASSERT(spaceNeeded == requiredCapacity); 1171 } 1172 1173 /* Split with too few output strings available (3) */ 1174 status = U_ZERO_ERROR; 1175 memset(fields, -1, sizeof(fields)); 1176 numFields = 1177 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status); 1178 TEST_ASSERT_SUCCESS(status); 1179 1180 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1181 if(U_SUCCESS(status)) { 1182 TEST_ASSERT(numFields == 3); 1183 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1184 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1185 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); 1186 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); 1187 1188 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */ 1189 TEST_ASSERT(spaceNeeded == requiredCapacity); 1190 } 1191 1192 /* Split with just enough output strings available (5) */ 1193 status = U_ZERO_ERROR; 1194 memset(fields, -1, sizeof(fields)); 1195 numFields = 1196 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status); 1197 TEST_ASSERT_SUCCESS(status); 1198 1199 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1200 if(U_SUCCESS(status)) { 1201 TEST_ASSERT(numFields == 5); 1202 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1203 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1204 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1205 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1206 TEST_ASSERT_STRING(" third", fields[4], TRUE); 1207 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); 1208 1209 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ 1210 TEST_ASSERT(spaceNeeded == requiredCapacity); 1211 } 1212 1213 /* Split, end of text is a field delimiter. */ 1214 status = U_ZERO_ERROR; 1215 sz = strlen("first <tag-a> second<tag-b>"); 1216 uregex_setText(re, textToSplit, sz, &status); 1217 TEST_ASSERT_SUCCESS(status); 1218 1219 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1220 if(U_SUCCESS(status)) { 1221 memset(fields, -1, sizeof(fields)); 1222 numFields = 1223 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status); 1224 TEST_ASSERT_SUCCESS(status); 1225 1226 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1227 if(U_SUCCESS(status)) { 1228 TEST_ASSERT(numFields == 5); 1229 TEST_ASSERT_STRING("first ", fields[0], TRUE); 1230 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); 1231 TEST_ASSERT_STRING(" second", fields[2], TRUE); 1232 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); 1233 TEST_ASSERT_STRING("", fields[4], TRUE); 1234 TEST_ASSERT(fields[5] == NULL); 1235 TEST_ASSERT(fields[8] == NULL); 1236 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); 1237 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */ 1238 TEST_ASSERT(spaceNeeded == requiredCapacity); 1239 } 1240 } 1241 1242 uregex_close(re); 1243 } 1244 1245 /* 1246 * set/getTimeLimit 1247 */ 1248 TEST_SETUP("abc$", "abcdef", 0); 1249 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); 1250 uregex_setTimeLimit(re, 1000, &status); 1251 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1252 TEST_ASSERT_SUCCESS(status); 1253 uregex_setTimeLimit(re, -1, &status); 1254 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1255 status = U_ZERO_ERROR; 1256 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); 1257 TEST_TEARDOWN; 1258 1259 /* 1260 * set/get Stack Limit 1261 */ 1262 TEST_SETUP("abc$", "abcdef", 0); 1263 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); 1264 uregex_setStackLimit(re, 40000, &status); 1265 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1266 TEST_ASSERT_SUCCESS(status); 1267 uregex_setStackLimit(re, -1, &status); 1268 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1269 status = U_ZERO_ERROR; 1270 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); 1271 TEST_TEARDOWN; 1272 1273 1274 /* 1275 * Get/Set callback functions 1276 * This test is copied from intltest regex/Callbacks 1277 * The pattern and test data will run long enough to cause the callback 1278 * to be invoked. The nested '+' operators give exponential time 1279 * behavior with increasing string length. 1280 */ 1281 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) 1282 callBackContext cbInfo = {4, 0, 0}; 1283 const void *pContext = &cbInfo; 1284 URegexMatchCallback *returnedFn = &TestCallbackFn; 1285 1286 /* Getting the callback fn when it hasn't been set must return NULL */ 1287 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1288 TEST_ASSERT_SUCCESS(status); 1289 TEST_ASSERT(returnedFn == NULL); 1290 TEST_ASSERT(pContext == NULL); 1291 1292 /* Set thecallback and do a match. */ 1293 /* The callback function should record that it has been called. */ 1294 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); 1295 TEST_ASSERT_SUCCESS(status); 1296 TEST_ASSERT(cbInfo.numCalls == 0); 1297 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); 1298 TEST_ASSERT_SUCCESS(status); 1299 TEST_ASSERT(cbInfo.numCalls > 0); 1300 1301 /* Getting the callback should return the values that were set above. */ 1302 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); 1303 TEST_ASSERT(returnedFn == &TestCallbackFn); 1304 TEST_ASSERT(pContext == &cbInfo); 1305 1306 TEST_TEARDOWN; 1307} 1308 1309 1310 1311static void TestBug4315(void) { 1312 UErrorCode theICUError = U_ZERO_ERROR; 1313 URegularExpression *theRegEx; 1314 UChar *textBuff; 1315 const char *thePattern; 1316 UChar theString[100]; 1317 UChar *destFields[24]; 1318 int32_t neededLength1; 1319 int32_t neededLength2; 1320 1321 int32_t wordCount = 0; 1322 int32_t destFieldsSize = 24; 1323 1324 thePattern = "ck "; 1325 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle."); 1326 1327 /* open a regex */ 1328 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); 1329 TEST_ASSERT_SUCCESS(theICUError); 1330 1331 /* set the input string */ 1332 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); 1333 TEST_ASSERT_SUCCESS(theICUError); 1334 1335 /* split */ 1336 /*explicitly pass NULL and 0 to force the overflow error -> this is where the 1337 * error occurs! */ 1338 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, 1339 destFieldsSize, &theICUError); 1340 1341 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); 1342 TEST_ASSERT(wordCount==3); 1343 1344 if(theICUError == U_BUFFER_OVERFLOW_ERROR) 1345 { 1346 theICUError = U_ZERO_ERROR; 1347 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); 1348 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2, 1349 destFields, destFieldsSize, &theICUError); 1350 TEST_ASSERT(wordCount==3); 1351 TEST_ASSERT_SUCCESS(theICUError); 1352 TEST_ASSERT(neededLength1 == neededLength2); 1353 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); 1354 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE); 1355 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); 1356 TEST_ASSERT(destFields[3] == NULL); 1357 free(textBuff); 1358 } 1359 uregex_close(theRegEx); 1360} 1361 1362/* Based on TestRegexCAPI() */ 1363static void TestUTextAPI(void) { 1364 UErrorCode status = U_ZERO_ERROR; 1365 URegularExpression *re; 1366 UText patternText = UTEXT_INITIALIZER; 1367 UChar pat[200]; 1368 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 }; 1369 1370 /* Mimimalist open/close */ 1371 utext_openUTF8(&patternText, patternTextUTF8, -1, &status); 1372 re = uregex_openUText(&patternText, 0, 0, &status); 1373 if (U_FAILURE(status)) { 1374 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); 1375 utext_close(&patternText); 1376 return; 1377 } 1378 uregex_close(re); 1379 1380 /* Open with all flag values set */ 1381 status = U_ZERO_ERROR; 1382 re = uregex_openUText(&patternText, 1383 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 1384 0, &status); 1385 TEST_ASSERT_SUCCESS(status); 1386 uregex_close(re); 1387 1388 /* Open with an invalid flag */ 1389 status = U_ZERO_ERROR; 1390 re = uregex_openUText(&patternText, 0x40000000, 0, &status); 1391 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); 1392 uregex_close(re); 1393 1394 /* open with an invalid parameter */ 1395 status = U_ZERO_ERROR; 1396 re = uregex_openUText(NULL, 1397 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status); 1398 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); 1399 1400 /* 1401 * clone 1402 */ 1403 { 1404 URegularExpression *clone1; 1405 URegularExpression *clone2; 1406 URegularExpression *clone3; 1407 UChar testString1[30]; 1408 UChar testString2[30]; 1409 UBool result; 1410 1411 1412 status = U_ZERO_ERROR; 1413 re = uregex_openUText(&patternText, 0, 0, &status); 1414 TEST_ASSERT_SUCCESS(status); 1415 clone1 = uregex_clone(re, &status); 1416 TEST_ASSERT_SUCCESS(status); 1417 TEST_ASSERT(clone1 != NULL); 1418 1419 status = U_ZERO_ERROR; 1420 clone2 = uregex_clone(re, &status); 1421 TEST_ASSERT_SUCCESS(status); 1422 TEST_ASSERT(clone2 != NULL); 1423 uregex_close(re); 1424 1425 status = U_ZERO_ERROR; 1426 clone3 = uregex_clone(clone2, &status); 1427 TEST_ASSERT_SUCCESS(status); 1428 TEST_ASSERT(clone3 != NULL); 1429 1430 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); 1431 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); 1432 1433 status = U_ZERO_ERROR; 1434 uregex_setText(clone1, testString1, -1, &status); 1435 TEST_ASSERT_SUCCESS(status); 1436 result = uregex_lookingAt(clone1, 0, &status); 1437 TEST_ASSERT_SUCCESS(status); 1438 TEST_ASSERT(result==TRUE); 1439 1440 status = U_ZERO_ERROR; 1441 uregex_setText(clone2, testString2, -1, &status); 1442 TEST_ASSERT_SUCCESS(status); 1443 result = uregex_lookingAt(clone2, 0, &status); 1444 TEST_ASSERT_SUCCESS(status); 1445 TEST_ASSERT(result==FALSE); 1446 result = uregex_find(clone2, 0, &status); 1447 TEST_ASSERT_SUCCESS(status); 1448 TEST_ASSERT(result==TRUE); 1449 1450 uregex_close(clone1); 1451 uregex_close(clone2); 1452 uregex_close(clone3); 1453 1454 } 1455 1456 /* 1457 * pattern() and patternText() 1458 */ 1459 { 1460 const UChar *resultPat; 1461 int32_t resultLen; 1462 UText *resultText; 1463 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ 1464 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ 1465 u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */ 1466 status = U_ZERO_ERROR; 1467 1468 utext_openUTF8(&patternText, str_hello, -1, &status); 1469 re = uregex_open(pat, -1, 0, NULL, &status); 1470 resultPat = uregex_pattern(re, &resultLen, &status); 1471 TEST_ASSERT_SUCCESS(status); 1472 1473 /* The TEST_ASSERT_SUCCESS above should change too... */ 1474 if (U_SUCCESS(status)) { 1475 TEST_ASSERT(resultLen == -1); 1476 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); 1477 } 1478 1479 resultText = uregex_patternUText(re, &status); 1480 TEST_ASSERT_SUCCESS(status); 1481 TEST_ASSERT_UTEXT(str_hello, resultText); 1482 1483 uregex_close(re); 1484 1485 status = U_ZERO_ERROR; 1486 re = uregex_open(pat, 3, 0, NULL, &status); 1487 resultPat = uregex_pattern(re, &resultLen, &status); 1488 TEST_ASSERT_SUCCESS(status); 1489 1490 /* The TEST_ASSERT_SUCCESS above should change too... */ 1491 if (U_SUCCESS(status)) { 1492 TEST_ASSERT(resultLen == 3); 1493 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); 1494 TEST_ASSERT(u_strlen(resultPat) == 3); 1495 } 1496 1497 resultText = uregex_patternUText(re, &status); 1498 TEST_ASSERT_SUCCESS(status); 1499 TEST_ASSERT_UTEXT(str_hel, resultText); 1500 1501 uregex_close(re); 1502 } 1503 1504 /* 1505 * setUText() and lookingAt() 1506 */ 1507 { 1508 UText text1 = UTEXT_INITIALIZER; 1509 UText text2 = UTEXT_INITIALIZER; 1510 UBool result; 1511 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1512 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1513 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1514 status = U_ZERO_ERROR; 1515 utext_openUTF8(&text1, str_abcccd, -1, &status); 1516 utext_openUTF8(&text2, str_abcccxd, -1, &status); 1517 1518 utext_openUTF8(&patternText, str_abcd, -1, &status); 1519 re = uregex_openUText(&patternText, 0, NULL, &status); 1520 TEST_ASSERT_SUCCESS(status); 1521 1522 /* Operation before doing a setText should fail... */ 1523 status = U_ZERO_ERROR; 1524 uregex_lookingAt(re, 0, &status); 1525 TEST_ASSERT( status== U_REGEX_INVALID_STATE); 1526 1527 status = U_ZERO_ERROR; 1528 uregex_setUText(re, &text1, &status); 1529 result = uregex_lookingAt(re, 0, &status); 1530 TEST_ASSERT(result == TRUE); 1531 TEST_ASSERT_SUCCESS(status); 1532 1533 status = U_ZERO_ERROR; 1534 uregex_setUText(re, &text2, &status); 1535 result = uregex_lookingAt(re, 0, &status); 1536 TEST_ASSERT(result == FALSE); 1537 TEST_ASSERT_SUCCESS(status); 1538 1539 status = U_ZERO_ERROR; 1540 uregex_setUText(re, &text1, &status); 1541 result = uregex_lookingAt(re, 0, &status); 1542 TEST_ASSERT(result == TRUE); 1543 TEST_ASSERT_SUCCESS(status); 1544 1545 uregex_close(re); 1546 utext_close(&text1); 1547 utext_close(&text2); 1548 } 1549 1550 1551 /* 1552 * getText() and getUText() 1553 */ 1554 { 1555 UText text1 = UTEXT_INITIALIZER; 1556 UText text2 = UTEXT_INITIALIZER; 1557 UChar text2Chars[20]; 1558 UText *resultText; 1559 const UChar *result; 1560 int32_t textLength; 1561 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */ 1562 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */ 1563 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */ 1564 1565 1566 status = U_ZERO_ERROR; 1567 utext_openUTF8(&text1, str_abcccd, -1, &status); 1568 u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2); 1569 utext_openUChars(&text2, text2Chars, -1, &status); 1570 1571 utext_openUTF8(&patternText, str_abcd, -1, &status); 1572 re = uregex_openUText(&patternText, 0, NULL, &status); 1573 1574 /* First set a UText */ 1575 uregex_setUText(re, &text1, &status); 1576 resultText = uregex_getUText(re, NULL, &status); 1577 TEST_ASSERT_SUCCESS(status); 1578 TEST_ASSERT(resultText != &text1); 1579 utext_setNativeIndex(resultText, 0); 1580 utext_setNativeIndex(&text1, 0); 1581 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); 1582 utext_close(resultText); 1583 1584 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */ 1585 TEST_ASSERT(textLength == -1 || textLength == 6); 1586 resultText = uregex_getUText(re, NULL, &status); 1587 TEST_ASSERT_SUCCESS(status); 1588 TEST_ASSERT(resultText != &text1); 1589 utext_setNativeIndex(resultText, 0); 1590 utext_setNativeIndex(&text1, 0); 1591 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); 1592 utext_close(resultText); 1593 1594 /* Then set a UChar * */ 1595 uregex_setText(re, text2Chars, 7, &status); 1596 resultText = uregex_getUText(re, NULL, &status); 1597 TEST_ASSERT_SUCCESS(status); 1598 utext_setNativeIndex(resultText, 0); 1599 utext_setNativeIndex(&text2, 0); 1600 TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0); 1601 utext_close(resultText); 1602 result = uregex_getText(re, &textLength, &status); 1603 TEST_ASSERT(textLength == 7); 1604 1605 uregex_close(re); 1606 utext_close(&text1); 1607 utext_close(&text2); 1608 } 1609 1610 /* 1611 * matches() 1612 */ 1613 { 1614 UText text1 = UTEXT_INITIALIZER; 1615 UBool result; 1616 UText nullText = UTEXT_INITIALIZER; 1617 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */ 1618 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */ 1619 1620 status = U_ZERO_ERROR; 1621 utext_openUTF8(&text1, str_abcccde, -1, &status); 1622 utext_openUTF8(&patternText, str_abcd, -1, &status); 1623 re = uregex_openUText(&patternText, 0, NULL, &status); 1624 1625 uregex_setUText(re, &text1, &status); 1626 result = uregex_matches(re, 0, &status); 1627 TEST_ASSERT(result == FALSE); 1628 TEST_ASSERT_SUCCESS(status); 1629 uregex_close(re); 1630 1631 status = U_ZERO_ERROR; 1632 re = uregex_openC(".?", 0, NULL, &status); 1633 uregex_setUText(re, &text1, &status); 1634 result = uregex_matches(re, 7, &status); 1635 TEST_ASSERT(result == TRUE); 1636 TEST_ASSERT_SUCCESS(status); 1637 1638 status = U_ZERO_ERROR; 1639 utext_openUTF8(&nullText, "", -1, &status); 1640 uregex_setUText(re, &nullText, &status); 1641 TEST_ASSERT_SUCCESS(status); 1642 result = uregex_matches(re, 0, &status); 1643 TEST_ASSERT(result == TRUE); 1644 TEST_ASSERT_SUCCESS(status); 1645 1646 uregex_close(re); 1647 utext_close(&text1); 1648 utext_close(&nullText); 1649 } 1650 1651 1652 /* 1653 * lookingAt() Used in setText test. 1654 */ 1655 1656 1657 /* 1658 * find(), findNext, start, end, reset 1659 */ 1660 { 1661 UChar text1[50]; 1662 UBool result; 1663 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); 1664 status = U_ZERO_ERROR; 1665 re = uregex_openC("rx", 0, NULL, &status); 1666 1667 uregex_setText(re, text1, -1, &status); 1668 result = uregex_find(re, 0, &status); 1669 TEST_ASSERT(result == TRUE); 1670 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1671 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1672 TEST_ASSERT_SUCCESS(status); 1673 1674 result = uregex_find(re, 9, &status); 1675 TEST_ASSERT(result == TRUE); 1676 TEST_ASSERT(uregex_start(re, 0, &status) == 11); 1677 TEST_ASSERT(uregex_end(re, 0, &status) == 13); 1678 TEST_ASSERT_SUCCESS(status); 1679 1680 result = uregex_find(re, 14, &status); 1681 TEST_ASSERT(result == FALSE); 1682 TEST_ASSERT_SUCCESS(status); 1683 1684 status = U_ZERO_ERROR; 1685 uregex_reset(re, 0, &status); 1686 1687 result = uregex_findNext(re, &status); 1688 TEST_ASSERT(result == TRUE); 1689 TEST_ASSERT(uregex_start(re, 0, &status) == 3); 1690 TEST_ASSERT(uregex_end(re, 0, &status) == 5); 1691 TEST_ASSERT_SUCCESS(status); 1692 1693 result = uregex_findNext(re, &status); 1694 TEST_ASSERT(result == TRUE); 1695 TEST_ASSERT(uregex_start(re, 0, &status) == 6); 1696 TEST_ASSERT(uregex_end(re, 0, &status) == 8); 1697 TEST_ASSERT_SUCCESS(status); 1698 1699 status = U_ZERO_ERROR; 1700 uregex_reset(re, 12, &status); 1701 1702 result = uregex_findNext(re, &status); 1703 TEST_ASSERT(result == TRUE); 1704 TEST_ASSERT(uregex_start(re, 0, &status) == 13); 1705 TEST_ASSERT(uregex_end(re, 0, &status) == 15); 1706 TEST_ASSERT_SUCCESS(status); 1707 1708 result = uregex_findNext(re, &status); 1709 TEST_ASSERT(result == FALSE); 1710 TEST_ASSERT_SUCCESS(status); 1711 1712 uregex_close(re); 1713 } 1714 1715 /* 1716 * group() 1717 */ 1718 { 1719 UChar text1[80]; 1720 UText *actual; 1721 UBool result; 1722 1723 const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */ 1724 const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */ 1725 1726 1727 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); 1728 1729 status = U_ZERO_ERROR; 1730 re = uregex_openC("abc(.*?)def", 0, NULL, &status); 1731 TEST_ASSERT_SUCCESS(status); 1732 1733 uregex_setText(re, text1, -1, &status); 1734 result = uregex_find(re, 0, &status); 1735 TEST_ASSERT(result==TRUE); 1736 1737 /* Capture Group 0, the full match. Should succeed. */ 1738 status = U_ZERO_ERROR; 1739 actual = uregex_groupUTextDeep(re, 0, NULL, &status); 1740 TEST_ASSERT_SUCCESS(status); 1741 TEST_ASSERT_UTEXT(str_abcinteriordef, actual); 1742 utext_close(actual); 1743 1744 /* Capture Group 0 with shallow clone API. Should succeed. */ 1745 status = U_ZERO_ERROR; 1746 { 1747 int64_t group_len; 1748 int32_t len16; 1749 UErrorCode shallowStatus = U_ZERO_ERROR; 1750 int64_t nativeIndex; 1751 UChar *groupChars; 1752 UText groupText = UTEXT_INITIALIZER; 1753 1754 actual = uregex_groupUText(re, 0, NULL, &group_len, &status); 1755 TEST_ASSERT_SUCCESS(status); 1756 1757 nativeIndex = utext_getNativeIndex(actual); 1758 /* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */ 1759 /* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */ 1760 len16 = (int32_t)group_len; 1761 1762 groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1)); 1763 utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus); 1764 1765 utext_openUChars(&groupText, groupChars, len16, &shallowStatus); 1766 1767 TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText); 1768 utext_close(&groupText); 1769 free(groupChars); 1770 } 1771 utext_close(actual); 1772 1773 /* Capture group #1. Should succeed. */ 1774 status = U_ZERO_ERROR; 1775 actual = uregex_groupUTextDeep(re, 1, NULL, &status); 1776 TEST_ASSERT_SUCCESS(status); 1777 TEST_ASSERT_UTEXT(str_interior, actual); 1778 utext_close(actual); 1779 1780 /* Capture group out of range. Error. */ 1781 status = U_ZERO_ERROR; 1782 actual = uregex_groupUTextDeep(re, 2, NULL, &status); 1783 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1784 TEST_ASSERT(utext_nativeLength(actual) == 0); 1785 utext_close(actual); 1786 1787 uregex_close(re); 1788 1789 } 1790 1791 /* 1792 * replaceFirst() 1793 */ 1794 { 1795 UChar text1[80]; 1796 UChar text2[80]; 1797 UText replText = UTEXT_INITIALIZER; 1798 UText *result; 1799 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */ 1800 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1801 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */ 1802 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1803 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ 1804 status = U_ZERO_ERROR; 1805 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 1806 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1807 utext_openUTF8(&replText, str_1x, -1, &status); 1808 1809 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1810 TEST_ASSERT_SUCCESS(status); 1811 1812 /* Normal case, with match */ 1813 uregex_setText(re, text1, -1, &status); 1814 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1815 TEST_ASSERT_SUCCESS(status); 1816 TEST_ASSERT_UTEXT(str_Replxxx, result); 1817 utext_close(result); 1818 1819 /* No match. Text should copy to output with no changes. */ 1820 uregex_setText(re, text2, -1, &status); 1821 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1822 TEST_ASSERT_SUCCESS(status); 1823 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1824 utext_close(result); 1825 1826 /* Unicode escapes */ 1827 uregex_setText(re, text1, -1, &status); 1828 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status); 1829 result = uregex_replaceFirstUText(re, &replText, NULL, &status); 1830 TEST_ASSERT_SUCCESS(status); 1831 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result); 1832 utext_close(result); 1833 1834 uregex_close(re); 1835 utext_close(&replText); 1836 } 1837 1838 1839 /* 1840 * replaceAll() 1841 */ 1842 { 1843 UChar text1[80]; 1844 UChar text2[80]; 1845 UText replText = UTEXT_INITIALIZER; 1846 UText *result; 1847 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ 1848 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ 1849 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ 1850 status = U_ZERO_ERROR; 1851 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 1852 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1853 utext_openUTF8(&replText, str_1, -1, &status); 1854 1855 re = uregex_openC("x(.*?)x", 0, NULL, &status); 1856 TEST_ASSERT_SUCCESS(status); 1857 1858 /* Normal case, with match */ 1859 uregex_setText(re, text1, -1, &status); 1860 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1861 TEST_ASSERT_SUCCESS(status); 1862 TEST_ASSERT_UTEXT(str_Replaceaa1, result); 1863 utext_close(result); 1864 1865 /* No match. Text should copy to output with no changes. */ 1866 uregex_setText(re, text2, -1, &status); 1867 result = uregex_replaceAllUText(re, &replText, NULL, &status); 1868 TEST_ASSERT_SUCCESS(status); 1869 TEST_ASSERT_UTEXT(str_Nomatchhere, result); 1870 utext_close(result); 1871 1872 uregex_close(re); 1873 utext_close(&replText); 1874 } 1875 1876 1877 /* 1878 * appendReplacement() 1879 */ 1880 { 1881 UChar text[100]; 1882 UChar repl[100]; 1883 UChar buf[100]; 1884 UChar *bufPtr; 1885 int32_t bufCap; 1886 1887 status = U_ZERO_ERROR; 1888 re = uregex_openC(".*", 0, 0, &status); 1889 TEST_ASSERT_SUCCESS(status); 1890 1891 u_uastrncpy(text, "whatever", sizeof(text)/2); 1892 u_uastrncpy(repl, "some other", sizeof(repl)/2); 1893 uregex_setText(re, text, -1, &status); 1894 1895 /* match covers whole target string */ 1896 uregex_find(re, 0, &status); 1897 TEST_ASSERT_SUCCESS(status); 1898 bufPtr = buf; 1899 bufCap = sizeof(buf) / 2; 1900 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1901 TEST_ASSERT_SUCCESS(status); 1902 TEST_ASSERT_STRING("some other", buf, TRUE); 1903 1904 /* Match has \u \U escapes */ 1905 uregex_find(re, 0, &status); 1906 TEST_ASSERT_SUCCESS(status); 1907 bufPtr = buf; 1908 bufCap = sizeof(buf) / 2; 1909 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); 1910 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); 1911 TEST_ASSERT_SUCCESS(status); 1912 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); 1913 1914 uregex_close(re); 1915 } 1916 1917 1918 /* 1919 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll(). 1920 */ 1921 1922 /* 1923 * splitUText() 1924 */ 1925 { 1926 UChar textToSplit[80]; 1927 UChar text2[80]; 1928 UText *fields[10]; 1929 int32_t numFields; 1930 int32_t i; 1931 1932 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); 1933 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 1934 1935 status = U_ZERO_ERROR; 1936 re = uregex_openC(":", 0, NULL, &status); 1937 1938 1939 /* Simple split */ 1940 1941 uregex_setText(re, textToSplit, -1, &status); 1942 TEST_ASSERT_SUCCESS(status); 1943 1944 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1945 if (U_SUCCESS(status)) { 1946 memset(fields, 0, sizeof(fields)); 1947 numFields = uregex_splitUText(re, fields, 10, &status); 1948 TEST_ASSERT_SUCCESS(status); 1949 1950 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1951 if(U_SUCCESS(status)) { 1952 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */ 1953 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */ 1954 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */ 1955 TEST_ASSERT(numFields == 3); 1956 TEST_ASSERT_UTEXT(str_first, fields[0]); 1957 TEST_ASSERT_UTEXT(str_second, fields[1]); 1958 TEST_ASSERT_UTEXT(str_third, fields[2]); 1959 TEST_ASSERT(fields[3] == NULL); 1960 } 1961 for(i = 0; i < numFields; i++) { 1962 utext_close(fields[i]); 1963 } 1964 } 1965 1966 uregex_close(re); 1967 1968 1969 /* Split with too few output strings available */ 1970 status = U_ZERO_ERROR; 1971 re = uregex_openC(":", 0, NULL, &status); 1972 uregex_setText(re, textToSplit, -1, &status); 1973 TEST_ASSERT_SUCCESS(status); 1974 1975 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1976 if(U_SUCCESS(status)) { 1977 fields[0] = NULL; 1978 fields[1] = NULL; 1979 fields[2] = &patternText; 1980 numFields = uregex_splitUText(re, fields, 2, &status); 1981 TEST_ASSERT_SUCCESS(status); 1982 1983 /* The TEST_ASSERT_SUCCESS call above should change too... */ 1984 if(U_SUCCESS(status)) { 1985 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 1986 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */ 1987 TEST_ASSERT(numFields == 2); 1988 TEST_ASSERT_UTEXT(str_first, fields[0]); 1989 TEST_ASSERT_UTEXT(str_secondthird, fields[1]); 1990 TEST_ASSERT(fields[2] == &patternText); 1991 } 1992 for(i = 0; i < numFields; i++) { 1993 utext_close(fields[i]); 1994 } 1995 } 1996 1997 uregex_close(re); 1998 } 1999 2000 /* splitUText(), part 2. Patterns with capture groups. The capture group text 2001 * comes out as additional fields. */ 2002 { 2003 UChar textToSplit[80]; 2004 UText *fields[10]; 2005 int32_t numFields; 2006 int32_t i; 2007 2008 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); 2009 2010 status = U_ZERO_ERROR; 2011 re = uregex_openC("<(.*?)>", 0, NULL, &status); 2012 2013 uregex_setText(re, textToSplit, -1, &status); 2014 TEST_ASSERT_SUCCESS(status); 2015 2016 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2017 if(U_SUCCESS(status)) { 2018 memset(fields, 0, sizeof(fields)); 2019 numFields = uregex_splitUText(re, fields, 10, &status); 2020 TEST_ASSERT_SUCCESS(status); 2021 2022 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2023 if(U_SUCCESS(status)) { 2024 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2025 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2026 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2027 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2028 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2029 2030 TEST_ASSERT(numFields == 5); 2031 TEST_ASSERT_UTEXT(str_first, fields[0]); 2032 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2033 TEST_ASSERT_UTEXT(str_second, fields[2]); 2034 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2035 TEST_ASSERT_UTEXT(str_third, fields[4]); 2036 TEST_ASSERT(fields[5] == NULL); 2037 } 2038 for(i = 0; i < numFields; i++) { 2039 utext_close(fields[i]); 2040 } 2041 } 2042 2043 /* Split with too few output strings available (2) */ 2044 status = U_ZERO_ERROR; 2045 fields[0] = NULL; 2046 fields[1] = NULL; 2047 fields[2] = &patternText; 2048 numFields = uregex_splitUText(re, fields, 2, &status); 2049 TEST_ASSERT_SUCCESS(status); 2050 2051 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2052 if(U_SUCCESS(status)) { 2053 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2054 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2055 TEST_ASSERT(numFields == 2); 2056 TEST_ASSERT_UTEXT(str_first, fields[0]); 2057 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]); 2058 TEST_ASSERT(fields[2] == &patternText); 2059 } 2060 for(i = 0; i < numFields; i++) { 2061 utext_close(fields[i]); 2062 } 2063 2064 2065 /* Split with too few output strings available (3) */ 2066 status = U_ZERO_ERROR; 2067 fields[0] = NULL; 2068 fields[1] = NULL; 2069 fields[2] = NULL; 2070 fields[3] = &patternText; 2071 numFields = uregex_splitUText(re, fields, 3, &status); 2072 TEST_ASSERT_SUCCESS(status); 2073 2074 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2075 if(U_SUCCESS(status)) { 2076 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2077 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2078 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ 2079 TEST_ASSERT(numFields == 3); 2080 TEST_ASSERT_UTEXT(str_first, fields[0]); 2081 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2082 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]); 2083 TEST_ASSERT(fields[3] == &patternText); 2084 } 2085 for(i = 0; i < numFields; i++) { 2086 utext_close(fields[i]); 2087 } 2088 2089 /* Split with just enough output strings available (5) */ 2090 status = U_ZERO_ERROR; 2091 fields[0] = NULL; 2092 fields[1] = NULL; 2093 fields[2] = NULL; 2094 fields[3] = NULL; 2095 fields[4] = NULL; 2096 fields[5] = &patternText; 2097 numFields = uregex_splitUText(re, fields, 5, &status); 2098 TEST_ASSERT_SUCCESS(status); 2099 2100 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2101 if(U_SUCCESS(status)) { 2102 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2103 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2104 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2105 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2106 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */ 2107 2108 TEST_ASSERT(numFields == 5); 2109 TEST_ASSERT_UTEXT(str_first, fields[0]); 2110 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2111 TEST_ASSERT_UTEXT(str_second, fields[2]); 2112 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2113 TEST_ASSERT_UTEXT(str_third, fields[4]); 2114 TEST_ASSERT(fields[5] == &patternText); 2115 } 2116 for(i = 0; i < numFields; i++) { 2117 utext_close(fields[i]); 2118 } 2119 2120 /* Split, end of text is a field delimiter. */ 2121 status = U_ZERO_ERROR; 2122 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status); 2123 TEST_ASSERT_SUCCESS(status); 2124 2125 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2126 if(U_SUCCESS(status)) { 2127 memset(fields, 0, sizeof(fields)); 2128 fields[9] = &patternText; 2129 numFields = uregex_splitUText(re, fields, 9, &status); 2130 TEST_ASSERT_SUCCESS(status); 2131 2132 /* The TEST_ASSERT_SUCCESS call above should change too... */ 2133 if(U_SUCCESS(status)) { 2134 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */ 2135 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ 2136 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ 2137 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ 2138 const char str_empty[] = { 0x00 }; 2139 2140 TEST_ASSERT(numFields == 5); 2141 TEST_ASSERT_UTEXT(str_first, fields[0]); 2142 TEST_ASSERT_UTEXT(str_taga, fields[1]); 2143 TEST_ASSERT_UTEXT(str_second, fields[2]); 2144 TEST_ASSERT_UTEXT(str_tagb, fields[3]); 2145 TEST_ASSERT_UTEXT(str_empty, fields[4]); 2146 TEST_ASSERT(fields[5] == NULL); 2147 TEST_ASSERT(fields[8] == NULL); 2148 TEST_ASSERT(fields[9] == &patternText); 2149 } 2150 for(i = 0; i < numFields; i++) { 2151 utext_close(fields[i]); 2152 } 2153 } 2154 2155 uregex_close(re); 2156 } 2157 utext_close(&patternText); 2158} 2159 2160 2161static void TestRefreshInput(void) { 2162 /* 2163 * RefreshInput changes out the input of a URegularExpression without 2164 * changing anything else in the match state. Used with Java JNI, 2165 * when Java moves the underlying string storage. This test 2166 * runs a find() loop, moving the text after the first match. 2167 * The right number of matches should still be found. 2168 */ 2169 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */ 2170 UChar movedStr[] = { 0, 0, 0, 0, 0, 0}; 2171 UErrorCode status = U_ZERO_ERROR; 2172 URegularExpression *re; 2173 UText ut1 = UTEXT_INITIALIZER; 2174 UText ut2 = UTEXT_INITIALIZER; 2175 2176 re = uregex_openC("[ABC]", 0, 0, &status); 2177 TEST_ASSERT_SUCCESS(status); 2178 2179 utext_openUChars(&ut1, testStr, -1, &status); 2180 TEST_ASSERT_SUCCESS(status); 2181 uregex_setUText(re, &ut1, &status); 2182 TEST_ASSERT_SUCCESS(status); 2183 2184 /* Find the first match "A" in the original string */ 2185 TEST_ASSERT(uregex_findNext(re, &status)); 2186 TEST_ASSERT(uregex_start(re, 0, &status) == 0); 2187 2188 /* Move the string, kill the original string. */ 2189 u_strcpy(movedStr, testStr); 2190 u_memset(testStr, 0, u_strlen(testStr)); 2191 utext_openUChars(&ut2, movedStr, -1, &status); 2192 TEST_ASSERT_SUCCESS(status); 2193 uregex_refreshUText(re, &ut2, &status); 2194 TEST_ASSERT_SUCCESS(status); 2195 2196 /* Find the following two matches, now working in the moved string. */ 2197 TEST_ASSERT(uregex_findNext(re, &status)); 2198 TEST_ASSERT(uregex_start(re, 0, &status) == 2); 2199 TEST_ASSERT(uregex_findNext(re, &status)); 2200 TEST_ASSERT(uregex_start(re, 0, &status) == 4); 2201 TEST_ASSERT(FALSE == uregex_findNext(re, &status)); 2202 2203 uregex_close(re); 2204} 2205 2206 2207static void TestBug8421(void) { 2208 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched 2209 * was failing. 2210 */ 2211 URegularExpression *re; 2212 UErrorCode status = U_ZERO_ERROR; 2213 int32_t limit = -1; 2214 2215 re = uregex_openC("abc", 0, 0, &status); 2216 TEST_ASSERT_SUCCESS(status); 2217 2218 limit = uregex_getTimeLimit(re, &status); 2219 TEST_ASSERT_SUCCESS(status); 2220 TEST_ASSERT(limit == 0); 2221 2222 uregex_setTimeLimit(re, 100, &status); 2223 TEST_ASSERT_SUCCESS(status); 2224 limit = uregex_getTimeLimit(re, &status); 2225 TEST_ASSERT_SUCCESS(status); 2226 TEST_ASSERT(limit == 100); 2227 2228 uregex_close(re); 2229} 2230 2231 2232#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ 2233