usettest.c revision f9878a236aa0d9662d8e40cafdaf2e04cd615835
1/* 2********************************************************************** 3* Copyright (c) 2002-2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6*/ 7#include "unicode/uset.h" 8#include "unicode/ustring.h" 9#include "cintltst.h" 10#include "cmemory.h" 11#include <stdlib.h> 12#include <string.h> 13 14#define TEST(x) addTest(root, &x, "uset/" # x) 15 16static void TestAPI(void); 17static void Testj2269(void); 18static void TestSerialized(void); 19static void TestNonInvariantPattern(void); 20static void TestBadPattern(void); 21static void TestFreezable(void); 22static void TestSpan(void); 23 24void addUSetTest(TestNode** root); 25 26static void expect(const USet* set, 27 const char* inList, 28 const char* outList, 29 UErrorCode* ec); 30static void expectContainment(const USet* set, 31 const char* list, 32 UBool isIn); 33static char oneUCharToChar(UChar32 c); 34static void expectItems(const USet* set, 35 const char* items); 36 37void 38addUSetTest(TestNode** root) { 39 TEST(TestAPI); 40 TEST(Testj2269); 41 TEST(TestSerialized); 42 TEST(TestNonInvariantPattern); 43 TEST(TestBadPattern); 44 TEST(TestFreezable); 45 TEST(TestSpan); 46} 47 48/*------------------------------------------------------------------ 49 * Tests 50 *------------------------------------------------------------------*/ 51 52static void Testj2269() { 53 UErrorCode status = U_ZERO_ERROR; 54 UChar a[4] = { 0x61, 0x62, 0x63, 0 }; 55 USet *s = uset_open(1, 0); 56 uset_addString(s, a, 3); 57 a[0] = 0x63; a[1] = 0x63; 58 expect(s, "{abc}", "{ccc}", &status); 59 uset_close(s); 60} 61 62static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */ 63static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1; 64 65static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */ 66static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1; 67 68static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */ 69static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1; 70 71static const UChar STR_bc[] = {98,99,0}; /* "bc" */ 72static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1; 73 74static const UChar STR_ab[] = {97,98,0}; /* "ab" */ 75static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1; 76 77/** 78 * Basic API test for uset.x 79 */ 80static void TestAPI() { 81 USet* set; 82 USet* set2; 83 UErrorCode ec; 84 85 /* [] */ 86 set = uset_openEmpty(); 87 expect(set, "", "abc{ab}", NULL); 88 uset_close(set); 89 90 set = uset_open(1, 0); 91 expect(set, "", "abc{ab}", NULL); 92 uset_close(set); 93 94 set = uset_open(1, 1); 95 uset_clear(set); 96 expect(set, "", "abc{ab}", NULL); 97 uset_close(set); 98 99 /* [ABC] */ 100 set = uset_open(0x0041, 0x0043); 101 expect(set, "ABC", "DEF{ab}", NULL); 102 uset_close(set); 103 104 /* [a-c{ab}] */ 105 ec = U_ZERO_ERROR; 106 set = uset_openPattern(PAT, PAT_LEN, &ec); 107 if(U_FAILURE(ec)) { 108 log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec)); 109 return; 110 } 111 if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) { 112 log_err("uset_resemblesPattern of PAT failed\n"); 113 } 114 expect(set, "abc{ab}", "def{bc}", &ec); 115 116 /* [a-d{ab}] */ 117 uset_add(set, 0x64); 118 expect(set, "abcd{ab}", "ef{bc}", NULL); 119 120 /* [acd{ab}{bc}] */ 121 uset_remove(set, 0x62); 122 uset_addString(set, STR_bc, STR_bc_LEN); 123 expect(set, "acd{ab}{bc}", "bef{cd}", NULL); 124 125 /* [acd{bc}] */ 126 uset_removeString(set, STR_ab, STR_ab_LEN); 127 expect(set, "acd{bc}", "bfg{ab}", NULL); 128 129 /* [^acd{bc}] */ 130 uset_complement(set); 131 expect(set, "bef{bc}", "acd{ac}", NULL); 132 133 /* [a-e{bc}] */ 134 uset_complement(set); 135 uset_addRange(set, 0x0062, 0x0065); 136 expect(set, "abcde{bc}", "fg{ab}", NULL); 137 138 /* [de{bc}] */ 139 uset_removeRange(set, 0x0050, 0x0063); 140 expect(set, "de{bc}", "bcfg{ab}", NULL); 141 142 /* [g-l] */ 143 uset_set(set, 0x0067, 0x006C); 144 expect(set, "ghijkl", "de{bc}", NULL); 145 146 if (uset_indexOf(set, 0x0067) != 0) { 147 log_err("uset_indexOf failed finding correct index of 'g'\n"); 148 } 149 150 if (uset_charAt(set, 0) != 0x0067) { 151 log_err("uset_charAt failed finding correct char 'g' at index 0\n"); 152 } 153 154 /* How to test this one...? */ 155 uset_compact(set); 156 157 /* [g-i] */ 158 uset_retain(set, 0x0067, 0x0069); 159 expect(set, "ghi", "dejkl{bc}", NULL); 160 161 /* UCHAR_ASCII_HEX_DIGIT */ 162 uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec); 163 if(U_FAILURE(ec)) { 164 log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec)); 165 return; 166 } 167 expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL); 168 169 /* [ab] */ 170 uset_clear(set); 171 uset_addAllCodePoints(set, STR_ab, STR_ab_LEN); 172 expect(set, "ab", "def{ab}", NULL); 173 if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){ 174 log_err("set should not conatin all characters of \"bc\" \n"); 175 } 176 177 /* [] */ 178 set2 = uset_open(1, 1); 179 uset_clear(set2); 180 181 /* space */ 182 uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec); 183 expect(set2, " ", "abcdefghi{bc}", NULL); 184 185 /* [a-c] */ 186 uset_set(set2, 0x0061, 0x0063); 187 /* [g-i] */ 188 uset_set(set, 0x0067, 0x0069); 189 190 /* [a-c g-i] */ 191 if (uset_containsSome(set, set2)) { 192 log_err("set should not contain some of set2 yet\n"); 193 } 194 uset_complementAll(set, set2); 195 if (!uset_containsSome(set, set2)) { 196 log_err("set should contain some of set2\n"); 197 } 198 expect(set, "abcghi", "def{bc}", NULL); 199 200 /* [g-i] */ 201 uset_removeAll(set, set2); 202 expect(set, "ghi", "abcdef{bc}", NULL); 203 204 /* [a-c g-i] */ 205 uset_addAll(set2, set); 206 expect(set2, "abcghi", "def{bc}", NULL); 207 208 /* [g-i] */ 209 uset_retainAll(set2, set); 210 expect(set2, "ghi", "abcdef{bc}", NULL); 211 212 uset_close(set); 213 uset_close(set2); 214} 215 216/*------------------------------------------------------------------ 217 * Support 218 *------------------------------------------------------------------*/ 219 220/** 221 * Verifies that the given set contains the characters and strings in 222 * inList, and does not contain those in outList. Also verifies that 223 * 'set' is not NULL and that 'ec' succeeds. 224 * @param set the set to test, or NULL (on error) 225 * @param inList list of set contents, in iteration order. Format is 226 * list of individual strings, in iteration order, followed by sorted 227 * list of strings, delimited by {}. This means we do not test 228 * characters '{' or '}' and we do not test strings containing those 229 * characters either. 230 * @param outList list of things not in the set. Same format as 231 * inList. 232 * @param ec an error code, checked for success. May be NULL in which 233 * case it is ignored. 234 */ 235static void expect(const USet* set, 236 const char* inList, 237 const char* outList, 238 UErrorCode* ec) { 239 if (ec!=NULL && U_FAILURE(*ec)) { 240 log_err("FAIL: %s\n", u_errorName(*ec)); 241 return; 242 } 243 if (set == NULL) { 244 log_err("FAIL: USet is NULL\n"); 245 return; 246 } 247 expectContainment(set, inList, TRUE); 248 expectContainment(set, outList, FALSE); 249 expectItems(set, inList); 250} 251 252static void expectContainment(const USet* set, 253 const char* list, 254 UBool isIn) { 255 const char* p = list; 256 UChar ustr[4096]; 257 char *pat; 258 UErrorCode ec; 259 int32_t rangeStart = -1, rangeEnd = -1, length; 260 261 ec = U_ZERO_ERROR; 262 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); 263 if(U_FAILURE(ec)) { 264 log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec)); 265 return; 266 } 267 pat=aescstrdup(ustr, length); 268 269 while (*p) { 270 if (*p=='{') { 271 const char* stringStart = ++p; 272 int32_t stringLength = 0; 273 char strCopy[64]; 274 275 while (*p++ != '}') { 276 } 277 stringLength = (int32_t)(p - stringStart - 1); 278 strncpy(strCopy, stringStart, stringLength); 279 strCopy[stringLength] = 0; 280 281 u_charsToUChars(stringStart, ustr, stringLength); 282 283 if (uset_containsString(set, ustr, stringLength) == isIn) { 284 log_verbose("Ok: %s %s \"%s\"\n", pat, 285 (isIn ? "contains" : "does not contain"), 286 strCopy); 287 } else { 288 log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat, 289 (isIn ? "does not contain" : "contains"), 290 strCopy); 291 } 292 } 293 294 else { 295 UChar32 c; 296 297 u_charsToUChars(p, ustr, 1); 298 c = ustr[0]; 299 300 if (uset_contains(set, c) == isIn) { 301 log_verbose("Ok: %s %s '%c'\n", pat, 302 (isIn ? "contains" : "does not contain"), 303 *p); 304 } else { 305 log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat, 306 (isIn ? "does not contain" : "contains"), 307 *p); 308 } 309 310 /* Test the range API too by looking for ranges */ 311 if (c == rangeEnd+1) { 312 rangeEnd = c; 313 } else { 314 if (rangeStart >= 0) { 315 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { 316 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, 317 (isIn ? "contains" : "does not contain"), 318 rangeStart, rangeEnd); 319 } else { 320 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat, 321 (isIn ? "does not contain" : "contains"), 322 rangeStart, rangeEnd); 323 } 324 } 325 rangeStart = rangeEnd = c; 326 } 327 328 ++p; 329 } 330 } 331 332 if (rangeStart >= 0) { 333 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { 334 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, 335 (isIn ? "contains" : "does not contain"), 336 rangeStart, rangeEnd); 337 } else { 338 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat, 339 (isIn ? "does not contain" : "contains"), 340 rangeStart, rangeEnd); 341 } 342 } 343} 344 345/* This only works for invariant BMP chars */ 346static char oneUCharToChar(UChar32 c) { 347 UChar ubuf[1]; 348 char buf[1]; 349 ubuf[0] = (UChar) c; 350 u_UCharsToChars(ubuf, buf, 1); 351 return buf[0]; 352} 353 354static void expectItems(const USet* set, 355 const char* items) { 356 const char* p = items; 357 UChar ustr[4096], itemStr[4096]; 358 char buf[4096]; 359 char *pat; 360 UErrorCode ec; 361 int32_t expectedSize = 0; 362 int32_t itemCount = uset_getItemCount(set); 363 int32_t itemIndex = 0; 364 UChar32 start = 1, end = 0; 365 int32_t itemLen = 0, length; 366 367 ec = U_ZERO_ERROR; 368 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); 369 if (U_FAILURE(ec)) { 370 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec)); 371 return; 372 } 373 pat=aescstrdup(ustr, length); 374 375 if (uset_isEmpty(set) != (strlen(items)==0)) { 376 log_data_err("FAIL: %s should return %s from isEmpty (Are you missing data?)\n", 377 pat, 378 strlen(items)==0 ? "TRUE" : "FALSE"); 379 } 380 381 /* Don't test patterns starting with "[^" */ 382 if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) { 383 return; 384 } 385 386 while (*p) { 387 388 ++expectedSize; 389 390 if (start > end || start == -1) { 391 /* Fetch our next item */ 392 if (itemIndex >= itemCount) { 393 log_data_err("FAIL: ran out of items iterating %s (Are you missing data?)\n", pat); 394 return; 395 } 396 397 itemLen = uset_getItem(set, itemIndex, &start, &end, 398 itemStr, sizeof(itemStr), &ec); 399 if (U_FAILURE(ec) || itemLen < 0) { 400 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec)); 401 return; 402 } 403 404 if (itemLen == 0) { 405 log_verbose("Ok: %s item %d is %c-%c\n", pat, 406 itemIndex, oneUCharToChar(start), 407 oneUCharToChar(end)); 408 } else { 409 itemStr[itemLen] = 0; 410 u_UCharsToChars(itemStr, buf, itemLen+1); 411 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf); 412 } 413 414 ++itemIndex; 415 } 416 417 if (*p=='{') { 418 const char* stringStart = ++p; 419 int32_t stringLength = 0; 420 char strCopy[64]; 421 422 while (*p++ != '}') { 423 } 424 stringLength = (int32_t)(p - stringStart - 1); 425 strncpy(strCopy, stringStart, stringLength); 426 strCopy[stringLength] = 0; 427 428 u_charsToUChars(stringStart, ustr, stringLength); 429 ustr[stringLength] = 0; 430 431 if (itemLen == 0) { 432 log_err("FAIL: for %s expect \"%s\" next, but got a char\n", 433 pat, strCopy); 434 return; 435 } 436 437 if (u_strcmp(ustr, itemStr) != 0) { 438 log_err("FAIL: for %s expect \"%s\" next\n", 439 pat, strCopy); 440 return; 441 } 442 } 443 444 else { 445 UChar32 c; 446 447 u_charsToUChars(p, ustr, 1); 448 c = ustr[0]; 449 450 if (itemLen != 0) { 451 log_err("FAIL: for %s expect '%c' next, but got a string\n", 452 pat, *p); 453 return; 454 } 455 456 if (c != start++) { 457 log_err("FAIL: for %s expect '%c' next\n", 458 pat, *p); 459 return; 460 } 461 462 ++p; 463 } 464 } 465 466 if (uset_size(set) == expectedSize) { 467 log_verbose("Ok: %s size is %d\n", pat, expectedSize); 468 } else { 469 log_err("FAIL: %s size is %d, expected %d\n", 470 pat, uset_size(set), expectedSize); 471 } 472} 473 474static void 475TestSerialized() { 476 uint16_t buffer[1000]; 477 USerializedSet sset; 478 USet *set; 479 UErrorCode errorCode; 480 UChar32 c; 481 int32_t length; 482 483 /* use a pattern that generates both BMP and supplementary code points */ 484 U_STRING_DECL(pattern, "[:Cf:]", 6); 485 U_STRING_INIT(pattern, "[:Cf:]", 6); 486 487 errorCode=U_ZERO_ERROR; 488 set=uset_openPattern(pattern, -1, &errorCode); 489 if(U_FAILURE(errorCode)) { 490 log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode)); 491 return; 492 } 493 494 length=uset_serialize(set, buffer, UPRV_LENGTHOF(buffer), &errorCode); 495 if(U_FAILURE(errorCode)) { 496 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode)); 497 uset_close(set); 498 return; 499 } 500 501 uset_getSerializedSet(&sset, buffer, length); 502 for(c=0; c<=0x10ffff; ++c) { 503 if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) { 504 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c); 505 break; 506 } 507 } 508 509 uset_close(set); 510} 511 512/** 513 * Make sure that when non-invariant chars are passed to uset_openPattern 514 * they do not cause an ugly failure mode (e.g. assertion failure). 515 * JB#3795. 516 */ 517static void 518TestNonInvariantPattern() { 519 UErrorCode ec = U_ZERO_ERROR; 520 /* The critical part of this test is that the following pattern 521 must contain a non-invariant character. */ 522 static const char *pattern = "[:ccc!=0:]"; 523 UChar buf[256]; 524 int32_t len = u_unescape(pattern, buf, 256); 525 /* This test 'fails' by having an assertion failure within the 526 following call. It passes by running to completion with no 527 assertion failure. */ 528 USet *set = uset_openPattern(buf, len, &ec); 529 uset_close(set); 530} 531 532static void TestBadPattern(void) { 533 UErrorCode status = U_ZERO_ERROR; 534 USet *pat; 535 U_STRING_DECL(pattern, "[", 1); 536 U_STRING_INIT(pattern, "[", 1); 537 pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status); 538 if (pat != NULL || U_SUCCESS(status)) { 539 log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status)); 540 } 541} 542 543static USet *openIDSet() { 544 UErrorCode errorCode = U_ZERO_ERROR; 545 U_STRING_DECL(pattern, "[:ID_Continue:]", 15); 546 U_STRING_INIT(pattern, "[:ID_Continue:]", 15); 547 return uset_openPattern(pattern, 15, &errorCode); 548} 549 550static void TestFreezable() { 551 USet *idSet; 552 USet *frozen; 553 USet *thawed; 554 555 idSet=openIDSet(); 556 557 if (idSet == NULL) { 558 log_data_err("openIDSet() returned NULL. (Are you missing data?)\n"); 559 uset_close(idSet); 560 return; 561 } 562 563 frozen=uset_clone(idSet); 564 565 if (frozen == NULL) { 566 log_err("uset_Clone() returned NULL\n"); 567 return; 568 } 569 570 if(!uset_equals(frozen, idSet)) { 571 log_err("uset_clone() did not make an equal copy\n"); 572 } 573 574 uset_freeze(frozen); 575 uset_addRange(frozen, 0xd802, 0xd805); 576 577 if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) { 578 log_err("uset_freeze() or uset_isFrozen() does not work\n"); 579 } 580 581 thawed=uset_cloneAsThawed(frozen); 582 583 if (thawed == NULL) { 584 log_err("uset_cloneAsThawed(frozen) returned NULL"); 585 uset_close(frozen); 586 uset_close(idSet); 587 return; 588 } 589 590 uset_addRange(thawed, 0xd802, 0xd805); 591 592 if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) { 593 log_err("uset_cloneAsThawed() does not work\n"); 594 } 595 596 uset_close(idSet); 597 uset_close(frozen); 598 uset_close(thawed); 599} 600 601static void TestSpan() { 602 static const UChar s16[2]={ 0xe01, 0x3000 }; 603 static const char* s8="\xE0\xB8\x81\xE3\x80\x80"; 604 605 USet *idSet=openIDSet(); 606 607 if (idSet == NULL) { 608 log_data_err("openIDSet() returned NULL (Are you missing data?)\n"); 609 return; 610 } 611 612 if( 613 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) || 614 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) || 615 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) || 616 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) 617 ) { 618 log_err("uset_span() or uset_spanBack() does not work\n"); 619 } 620 621 if( 622 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) || 623 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) || 624 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) || 625 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) 626 ) { 627 log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n"); 628 } 629 630 uset_freeze(idSet); 631 632 if( 633 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) || 634 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) || 635 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) || 636 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) 637 ) { 638 log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n"); 639 } 640 641 if( 642 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) || 643 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) || 644 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) || 645 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) 646 ) { 647 log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n"); 648 } 649 650 uset_close(idSet); 651} 652 653/*eof*/ 654