1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/******************************************************************** 4 * Copyright (c) 1999-2016, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 ******************************************************************** 7 * Date Name Description 8 * 12/14/99 Madhu Creation. 9 * 01/12/2000 Madhu updated for changed API 10 ********************************************************************/ 11 12#include "unicode/utypes.h" 13 14#if !UCONFIG_NO_BREAK_ITERATION 15 16#include "unicode/uchar.h" 17#include "intltest.h" 18#include "unicode/rbbi.h" 19#include "unicode/schriter.h" 20#include "rbbiapts.h" 21#include "rbbidata.h" 22#include "cstring.h" 23#include "ubrkimpl.h" 24#include "unicode/locid.h" 25#include "unicode/ustring.h" 26#include "unicode/utext.h" 27#include "cmemory.h" 28#if !UCONFIG_NO_BREAK_ITERATION 29#include "unicode/filteredbrk.h" 30#include <stdio.h> // for sprintf 31#endif 32/** 33 * API Test the RuleBasedBreakIterator class 34 */ 35 36 37#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ 38dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} 39 40#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ 41 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} 42 43void RBBIAPITest::TestCloneEquals() 44{ 45 46 UErrorCode status=U_ZERO_ERROR; 47 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 48 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 49 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 50 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 51 if(U_FAILURE(status)){ 52 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 53 return; 54 } 55 56 57 UnicodeString testString="Testing word break iterators's clone() and equals()"; 58 bi1->setText(testString); 59 bi2->setText(testString); 60 biequal->setText(testString); 61 62 bi3->setText("hello"); 63 64 logln((UnicodeString)"Testing equals()"); 65 66 logln((UnicodeString)"Testing == and !="); 67 UBool b = (*bi1 != *biequal); 68 b |= *bi1 == *bi2; 69 b |= *bi1 == *bi3; 70 if (b) { 71 errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__); 72 } 73 74 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) 75 errln("%s:%d ERROR:2 RBBI's == and != operator failed.", __FILE__, __LINE__); 76 77 78 // Quick test of RulesBasedBreakIterator assignment - 79 // Check that 80 // two different iterators are != 81 // they are == after assignment 82 // source and dest iterator produce the same next() after assignment. 83 // deleting one doesn't disable the other. 84 logln("Testing assignment"); 85 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 86 if(U_FAILURE(status)){ 87 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 88 return; 89 } 90 91 RuleBasedBreakIterator biDefault, biDefault2; 92 if(U_FAILURE(status)){ 93 errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__); 94 return; 95 } 96 if (biDefault == *bix) { 97 errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__); 98 return; 99 } 100 if (biDefault != biDefault2) { 101 errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__); 102 return; 103 } 104 105 106 UnicodeString HelloString("Hello Kitty"); 107 bix->setText(HelloString); 108 if (*bix == *bi2) { 109 errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__); 110 } 111 *bix = *bi2; 112 if (*bix != *bi2) { 113 errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__); 114 } 115 116 int bixnext = bix->next(); 117 int bi2next = bi2->next(); 118 if (! (bixnext == bi2next && bixnext == 7)) { 119 errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__); 120 } 121 delete bix; 122 if (bi2->next() != 8) { 123 errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__); 124 } 125 126 127 128 logln((UnicodeString)"Testing clone()"); 129 RuleBasedBreakIterator* bi1clone = dynamic_cast<RuleBasedBreakIterator *>(bi1->clone()); 130 RuleBasedBreakIterator* bi2clone = dynamic_cast<RuleBasedBreakIterator *>(bi2->clone()); 131 132 if(*bi1clone != *bi1 || *bi1clone != *biequal || 133 *bi1clone == *bi3 || *bi1clone == *bi2) 134 errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__); 135 136 if(*bi2clone == *bi1 || *bi2clone == *biequal || 137 *bi2clone == *bi3 || *bi2clone != *bi2) 138 errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__); 139 140 if(bi1->getText() != bi1clone->getText() || 141 bi2clone->getText() != bi2->getText() || 142 *bi2clone == *bi1clone ) 143 errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__); 144 145 delete bi1clone; 146 delete bi2clone; 147 delete bi1; 148 delete bi3; 149 delete bi2; 150 delete biequal; 151} 152 153void RBBIAPITest::TestBoilerPlate() 154{ 155 UErrorCode status = U_ZERO_ERROR; 156 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); 157 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); 158 if (U_FAILURE(status)) { 159 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 160 return; 161 } 162 if(*a!=*b){ 163 errln("Failed: boilerplate method operator!= does not return correct results"); 164 } 165 // Japanese word break iterators are identical to root with 166 // a dictionary-based break iterator 167 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status); 168 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status); 169 if(c && d){ 170 if(*c!=*d){ 171 errln("Failed: boilerplate method operator== does not return correct results"); 172 } 173 }else{ 174 errln("creation of break iterator failed"); 175 } 176 delete a; 177 delete b; 178 delete c; 179 delete d; 180} 181 182void RBBIAPITest::TestgetRules() 183{ 184 UErrorCode status=U_ZERO_ERROR; 185 186 LocalPointer<RuleBasedBreakIterator> bi1( 187 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status), status); 188 LocalPointer<RuleBasedBreakIterator> bi2( 189 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status), status); 190 if(U_FAILURE(status)){ 191 errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status)); 192 return; 193 } 194 195 logln((UnicodeString)"Testing getRules()"); 196 197 UnicodeString text(u"Hello there"); 198 bi1->setText(text); 199 200 LocalPointer <RuleBasedBreakIterator> bi3((RuleBasedBreakIterator*)bi1->clone()); 201 202 UnicodeString temp=bi1->getRules(); 203 UnicodeString temp2=bi2->getRules(); 204 UnicodeString temp3=bi3->getRules(); 205 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) 206 errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__); 207 208 RuleBasedBreakIterator bi4; // Default RuleBasedBreakIterator constructor gives empty shell with empty rules. 209 if (!bi4.getRules().isEmpty()) { 210 errln("%s:%d Empty string expected.", __FILE__, __LINE__); 211 } 212} 213 214void RBBIAPITest::TestHashCode() 215{ 216 UErrorCode status=U_ZERO_ERROR; 217 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 218 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 219 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 220 if(U_FAILURE(status)){ 221 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 222 delete bi1; 223 delete bi2; 224 delete bi3; 225 return; 226 } 227 228 229 logln((UnicodeString)"Testing hashCode()"); 230 231 bi1->setText((UnicodeString)"Hash code"); 232 bi2->setText((UnicodeString)"Hash code"); 233 bi3->setText((UnicodeString)"Hash code"); 234 235 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); 236 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); 237 238 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || 239 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) 240 errln((UnicodeString)"ERROR: identical objects have different hashcodes"); 241 242 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || 243 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) 244 errln((UnicodeString)"ERROR: different objects have same hashcodes"); 245 246 delete bi1clone; 247 delete bi2clone; 248 delete bi1; 249 delete bi2; 250 delete bi3; 251 252} 253void RBBIAPITest::TestGetSetAdoptText() 254{ 255 logln((UnicodeString)"Testing getText setText "); 256 IcuTestErrorCode status(*this, "TestGetSetAdoptText"); 257 UnicodeString str1="first string."; 258 UnicodeString str2="Second string."; 259 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); 260 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)); 261 if(status.isFailure()){ 262 errcheckln(status, "Fail : in construction - %s", status.errorName()); 263 return; 264 } 265 266 267 CharacterIterator* text1= new StringCharacterIterator(str1); 268 CharacterIterator* text1Clone = text1->clone(); 269 CharacterIterator* text2= new StringCharacterIterator(str2); 270 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" 271 272 wordIter1->setText(str1); 273 CharacterIterator *tci = &wordIter1->getText(); 274 UnicodeString tstr; 275 tci->getText(tstr); 276 TEST_ASSERT(tstr == str1); 277 if(wordIter1->current() != 0) 278 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 279 280 wordIter1->next(2); 281 282 wordIter1->setText(str2); 283 if(wordIter1->current() != 0) 284 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 285 286 287 charIter1->adoptText(text1Clone); 288 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); 289 tci = &wordIter1->getText(); 290 tci->getText(tstr); 291 TEST_ASSERT(tstr == str2); 292 tci = &charIter1->getText(); 293 tci->getText(tstr); 294 TEST_ASSERT(tstr == str1); 295 296 297 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone()); 298 rb->adoptText(text1); 299 if(rb->getText() != *text1) 300 errln((UnicodeString)"ERROR:1 error in adoptText "); 301 rb->adoptText(text2); 302 if(rb->getText() != *text2) 303 errln((UnicodeString)"ERROR:2 error in adoptText "); 304 305 // Adopt where iterator range is less than the entire orignal source string. 306 // (With the change of the break engine to working with UText internally, 307 // CharacterIterators starting at positions other than zero are not supported) 308 rb->adoptText(text3); 309 TEST_ASSERT(rb->preceding(2) == 0); 310 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); 311 //if(rb->preceding(2) != 3) { 312 // errln((UnicodeString)"ERROR:3 error in adoptText "); 313 //} 314 //if(rb->following(11) != BreakIterator::DONE) { 315 // errln((UnicodeString)"ERROR:4 error in adoptText "); 316 //} 317 318 // UText API 319 // 320 // Quick test to see if UText is working at all. 321 // 322 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ 323 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ 324 // 012345678901 325 326 status.reset(); 327 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); 328 wordIter1->setText(ut.getAlias(), status); 329 TEST_ASSERT_SUCCESS(status); 330 331 int32_t pos; 332 pos = wordIter1->first(); 333 TEST_ASSERT(pos==0); 334 pos = wordIter1->next(); 335 TEST_ASSERT(pos==5); 336 pos = wordIter1->next(); 337 TEST_ASSERT(pos==6); 338 pos = wordIter1->next(); 339 TEST_ASSERT(pos==11); 340 pos = wordIter1->next(); 341 TEST_ASSERT(pos==UBRK_DONE); 342 343 status.reset(); 344 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); 345 TEST_ASSERT_SUCCESS(status); 346 wordIter1->setText(ut2.getAlias(), status); 347 TEST_ASSERT_SUCCESS(status); 348 349 pos = wordIter1->first(); 350 TEST_ASSERT(pos==0); 351 pos = wordIter1->next(); 352 TEST_ASSERT(pos==3); 353 pos = wordIter1->next(); 354 TEST_ASSERT(pos==4); 355 356 pos = wordIter1->last(); 357 TEST_ASSERT(pos==6); 358 pos = wordIter1->previous(); 359 TEST_ASSERT(pos==4); 360 pos = wordIter1->previous(); 361 TEST_ASSERT(pos==3); 362 pos = wordIter1->previous(); 363 TEST_ASSERT(pos==0); 364 pos = wordIter1->previous(); 365 TEST_ASSERT(pos==UBRK_DONE); 366 367 status.reset(); 368 UnicodeString sEmpty; 369 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); 370 wordIter1->getUText(gut2.getAlias(), status); 371 TEST_ASSERT_SUCCESS(status); 372 status.reset(); 373} 374 375 376void RBBIAPITest::TestIteration() 377{ 378 // This test just verifies that the API is present. 379 // Testing for correct operation of the break rules happens elsewhere. 380 381 UErrorCode status=U_ZERO_ERROR; 382 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 383 if (U_FAILURE(status) || bi == NULL) { 384 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 385 } 386 delete bi; 387 388 status=U_ZERO_ERROR; 389 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 390 if (U_FAILURE(status) || bi == NULL) { 391 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); 392 } 393 delete bi; 394 395 status=U_ZERO_ERROR; 396 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); 397 if (U_FAILURE(status) || bi == NULL) { 398 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); 399 } 400 delete bi; 401 402 status=U_ZERO_ERROR; 403 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); 404 if (U_FAILURE(status) || bi == NULL) { 405 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); 406 } 407 delete bi; 408 409 status=U_ZERO_ERROR; 410 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); 411 if (U_FAILURE(status) || bi == NULL) { 412 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); 413 } 414 delete bi; 415 416 status=U_ZERO_ERROR; 417 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 418 if (U_FAILURE(status) || bi == NULL) { 419 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 420 return; // Skip the rest of these tests. 421 } 422 423 424 UnicodeString testString="0123456789"; 425 bi->setText(testString); 426 427 int32_t i; 428 i = bi->first(); 429 if (i != 0) { 430 errln("%s:%d Incorrect value from bi->first(). Expected 0, got %d.", __FILE__, __LINE__, i); 431 } 432 433 i = bi->last(); 434 if (i != 10) { 435 errln("%s:%d Incorrect value from bi->last(). Expected 10, got %d", __FILE__, __LINE__, i); 436 } 437 438 // 439 // Previous 440 // 441 bi->last(); 442 i = bi->previous(); 443 if (i != 9) { 444 errln("%s:%d Incorrect value from bi->last(). Expected 9, got %d", __FILE__, __LINE__, i); 445 } 446 447 448 bi->first(); 449 i = bi->previous(); 450 if (i != BreakIterator::DONE) { 451 errln("%s:%d Incorrect value from bi->previous(). Expected DONE, got %d", __FILE__, __LINE__, i); 452 } 453 454 // 455 // next() 456 // 457 bi->first(); 458 i = bi->next(); 459 if (i != 1) { 460 errln("%s:%d Incorrect value from bi->next(). Expected 1, got %d", __FILE__, __LINE__, i); 461 } 462 463 bi->last(); 464 i = bi->next(); 465 if (i != BreakIterator::DONE) { 466 errln("%s:%d Incorrect value from bi->next(). Expected DONE, got %d", __FILE__, __LINE__, i); 467 } 468 469 470 // 471 // current() 472 // 473 bi->first(); 474 i = bi->current(); 475 if (i != 0) { 476 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i); 477 } 478 479 bi->next(); 480 i = bi->current(); 481 if (i != 1) { 482 errln("%s:%d Incorrect value from bi->current(). Expected 1, got %d", __FILE__, __LINE__, i); 483 } 484 485 bi->last(); 486 bi->next(); 487 i = bi->current(); 488 if (i != 10) { 489 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i); 490 } 491 492 bi->first(); 493 bi->previous(); 494 i = bi->current(); 495 if (i != 0) { 496 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i); 497 } 498 499 500 // 501 // Following() 502 // 503 i = bi->following(4); 504 if (i != 5) { 505 errln("%s:%d Incorrect value from bi->following(). Expected 5, got %d", __FILE__, __LINE__, i); 506 } 507 508 i = bi->following(9); 509 if (i != 10) { 510 errln("%s:%d Incorrect value from bi->following(). Expected 10, got %d", __FILE__, __LINE__, i); 511 } 512 513 i = bi->following(10); 514 if (i != BreakIterator::DONE) { 515 errln("%s:%d Incorrect value from bi->following(). Expected DONE, got %d", __FILE__, __LINE__, i); 516 } 517 518 519 // 520 // Preceding 521 // 522 i = bi->preceding(4); 523 if (i != 3) { 524 errln("%s:%d Incorrect value from bi->preceding(). Expected 3, got %d", __FILE__, __LINE__, i); 525 } 526 527 i = bi->preceding(10); 528 if (i != 9) { 529 errln("%s:%d Incorrect value from bi->preceding(). Expected 9, got %d", __FILE__, __LINE__, i); 530 } 531 532 i = bi->preceding(1); 533 if (i != 0) { 534 errln("%s:%d Incorrect value from bi->preceding(). Expected 0, got %d", __FILE__, __LINE__, i); 535 } 536 537 i = bi->preceding(0); 538 if (i != BreakIterator::DONE) { 539 errln("%s:%d Incorrect value from bi->preceding(). Expected DONE, got %d", __FILE__, __LINE__, i); 540 } 541 542 543 // 544 // isBoundary() 545 // 546 bi->first(); 547 if (bi->isBoundary(3) != TRUE) { 548 errln("%s:%d Incorrect value from bi->isBoudary(). Expected TRUE, got FALSE", __FILE__, __LINE__, i); 549 } 550 i = bi->current(); 551 if (i != 3) { 552 errln("%s:%d Incorrect value from bi->current(). Expected 3, got %d", __FILE__, __LINE__, i); 553 } 554 555 556 if (bi->isBoundary(11) != FALSE) { 557 errln("%s:%d Incorrect value from bi->isBoudary(). Expected FALSE, got TRUE", __FILE__, __LINE__, i); 558 } 559 i = bi->current(); 560 if (i != 10) { 561 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i); 562 } 563 564 // 565 // next(n) 566 // 567 bi->first(); 568 i = bi->next(4); 569 if (i != 4) { 570 errln("%s:%d Incorrect value from bi->next(). Expected 4, got %d", __FILE__, __LINE__, i); 571 } 572 573 i = bi->next(6); 574 if (i != 10) { 575 errln("%s:%d Incorrect value from bi->next(). Expected 10, got %d", __FILE__, __LINE__, i); 576 } 577 578 bi->first(); 579 i = bi->next(11); 580 if (i != BreakIterator::DONE) { 581 errln("%s:%d Incorrect value from bi->next(). Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i); 582 } 583 584 delete bi; 585 586} 587 588 589 590 591 592 593void RBBIAPITest::TestBuilder() { 594 UnicodeString rulesString1 = "$Letters = [:L:];\n" 595 "$Numbers = [:N:];\n" 596 "$Letters+;\n" 597 "$Numbers+;\n" 598 "[^$Letters $Numbers];\n" 599 "!.*;\n"; 600 UnicodeString testString1 = "abc123..abc"; 601 // 01234567890 602 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; 603 UErrorCode status=U_ZERO_ERROR; 604 UParseError parseError; 605 606 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 607 if(U_FAILURE(status)) { 608 dataerrln("Fail : in construction - %s", u_errorName(status)); 609 } else { 610 bi->setText(testString1); 611 doBoundaryTest(*bi, testString1, bounds1); 612 } 613 delete bi; 614} 615 616 617// 618// TestQuoteGrouping 619// Single quotes within rules imply a grouping, so that a modifier 620// following the quoted text (* or +) applies to all of the quoted chars. 621// 622void RBBIAPITest::TestQuoteGrouping() { 623 UnicodeString rulesString1 = "#Here comes the rule...\n" 624 "'$@!'*;\n" // (\$\@\!)* 625 ".;\n"; 626 627 UnicodeString testString1 = "$@!$@!X$@!!X"; 628 // 0123456789012 629 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; 630 UErrorCode status=U_ZERO_ERROR; 631 UParseError parseError; 632 633 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 634 if(U_FAILURE(status)) { 635 dataerrln("Fail : in construction - %s", u_errorName(status)); 636 } else { 637 bi->setText(testString1); 638 doBoundaryTest(*bi, testString1, bounds1); 639 } 640 delete bi; 641} 642 643// 644// TestRuleStatus 645// Test word break rule status constants. 646// 647void RBBIAPITest::TestRuleStatus() { 648 UChar str[30]; 649 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing 650 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO 651 u_unescape("plain word 123.45 \\u30a1\\u30a2 ", 652 // 012345678901234567 8 9 0 653 // Katakana 654 str, 30); 655 UnicodeString testString1(str); 656 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; 657 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, 658 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, 659 UBRK_WORD_IDEO, UBRK_WORD_NONE}; 660 661 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, 662 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, 663 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT}; 664 665 UErrorCode status=U_ZERO_ERROR; 666 667 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status); 668 if(U_FAILURE(status)) { 669 errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status)); 670 } else { 671 bi->setText(testString1); 672 // First test that the breaks are in the right spots. 673 doBoundaryTest(*bi, testString1, bounds1); 674 675 // Then go back and check tag values 676 int32_t i = 0; 677 int32_t pos, tag; 678 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { 679 if (pos != bounds1[i]) { 680 errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__, __LINE__, pos); 681 break; 682 } 683 tag = bi->getRuleStatus(); 684 if (tag < tag_lo[i] || tag >= tag_hi[i]) { 685 errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos); 686 break; 687 } 688 689 // Check that we get the same tag values from getRuleStatusVec() 690 int32_t vec[10]; 691 int t = bi->getRuleStatusVec(vec, 10, status); 692 TEST_ASSERT_SUCCESS(status); 693 TEST_ASSERT(t==1); 694 TEST_ASSERT(vec[0] == tag); 695 } 696 } 697 delete bi; 698 699 // Now test line break status. This test mostly is to confirm that the status constants 700 // are correctly declared in the header. 701 testString1 = "test line. \n"; 702 // break type s s h 703 704 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status); 705 if(U_FAILURE(status)) { 706 errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status)); 707 } else { 708 int32_t i = 0; 709 int32_t pos, tag; 710 UBool success; 711 712 bi->setText(testString1); 713 pos = bi->current(); 714 tag = bi->getRuleStatus(); 715 for (i=0; i<3; i++) { 716 switch (i) { 717 case 0: 718 success = pos==0 && tag==UBRK_LINE_SOFT; break; 719 case 1: 720 success = pos==5 && tag==UBRK_LINE_SOFT; break; 721 case 2: 722 success = pos==12 && tag==UBRK_LINE_HARD; break; 723 default: 724 success = FALSE; break; 725 } 726 if (success == FALSE) { 727 errln("%s:%d: incorrect line break status or position. i=%d, pos=%d, tag=%d", 728 __FILE__, __LINE__, i, pos, tag); 729 break; 730 } 731 pos = bi->next(); 732 tag = bi->getRuleStatus(); 733 } 734 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || 735 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || 736 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) { 737 errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__); 738 } 739 } 740 delete bi; 741 742} 743 744 745// 746// TestRuleStatusVec 747// Test the vector form of break rule status. 748// 749void RBBIAPITest::TestRuleStatusVec() { 750 UnicodeString rulesString( "[A-N]{100}; \n" 751 "[a-w]{200}; \n" 752 "[\\p{L}]{300}; \n" 753 "[\\p{N}]{400}; \n" 754 "[0-5]{500}; \n" 755 "!.*;\n", -1, US_INV); 756 UnicodeString testString1 = "Aapz5?"; 757 int32_t statusVals[10]; 758 int32_t numStatuses; 759 int32_t pos; 760 761 UErrorCode status=U_ZERO_ERROR; 762 UParseError parseError; 763 764 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); 765 if (U_FAILURE(status)) { 766 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); 767 } else { 768 bi->setText(testString1); 769 770 // A 771 pos = bi->next(); 772 TEST_ASSERT(pos==1); 773 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 774 TEST_ASSERT_SUCCESS(status); 775 TEST_ASSERT(numStatuses == 2); 776 TEST_ASSERT(statusVals[0] == 100); 777 TEST_ASSERT(statusVals[1] == 300); 778 779 // a 780 pos = bi->next(); 781 TEST_ASSERT(pos==2); 782 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 783 TEST_ASSERT_SUCCESS(status); 784 TEST_ASSERT(numStatuses == 2); 785 TEST_ASSERT(statusVals[0] == 200); 786 TEST_ASSERT(statusVals[1] == 300); 787 788 // p 789 pos = bi->next(); 790 TEST_ASSERT(pos==3); 791 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 792 TEST_ASSERT_SUCCESS(status); 793 TEST_ASSERT(numStatuses == 2); 794 TEST_ASSERT(statusVals[0] == 200); 795 TEST_ASSERT(statusVals[1] == 300); 796 797 // z 798 pos = bi->next(); 799 TEST_ASSERT(pos==4); 800 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 801 TEST_ASSERT_SUCCESS(status); 802 TEST_ASSERT(numStatuses == 1); 803 TEST_ASSERT(statusVals[0] == 300); 804 805 // 5 806 pos = bi->next(); 807 TEST_ASSERT(pos==5); 808 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 809 TEST_ASSERT_SUCCESS(status); 810 TEST_ASSERT(numStatuses == 2); 811 TEST_ASSERT(statusVals[0] == 400); 812 TEST_ASSERT(statusVals[1] == 500); 813 814 // ? 815 pos = bi->next(); 816 TEST_ASSERT(pos==6); 817 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 818 TEST_ASSERT_SUCCESS(status); 819 TEST_ASSERT(numStatuses == 1); 820 TEST_ASSERT(statusVals[0] == 0); 821 822 // 823 // Check buffer overflow error handling. Char == A 824 // 825 bi->first(); 826 pos = bi->next(); 827 TEST_ASSERT(pos==1); 828 memset(statusVals, -1, sizeof(statusVals)); 829 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); 830 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 831 TEST_ASSERT(numStatuses == 2); 832 TEST_ASSERT(statusVals[0] == -1); 833 834 status = U_ZERO_ERROR; 835 memset(statusVals, -1, sizeof(statusVals)); 836 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); 837 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 838 TEST_ASSERT(numStatuses == 2); 839 TEST_ASSERT(statusVals[0] == 100); 840 TEST_ASSERT(statusVals[1] == -1); 841 842 status = U_ZERO_ERROR; 843 memset(statusVals, -1, sizeof(statusVals)); 844 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); 845 TEST_ASSERT_SUCCESS(status); 846 TEST_ASSERT(numStatuses == 2); 847 TEST_ASSERT(statusVals[0] == 100); 848 TEST_ASSERT(statusVals[1] == 300); 849 TEST_ASSERT(statusVals[2] == -1); 850 } 851 delete bi; 852 853} 854 855// 856// Bug 2190 Regression test. Builder crash on rule consisting of only a 857// $variable reference 858void RBBIAPITest::TestBug2190() { 859 UnicodeString rulesString1 = "$aaa = abcd;\n" 860 "$bbb = $aaa;\n" 861 "$bbb;\n"; 862 UnicodeString testString1 = "abcdabcd"; 863 // 01234567890 864 int32_t bounds1[] = {0, 4, 8}; 865 UErrorCode status=U_ZERO_ERROR; 866 UParseError parseError; 867 868 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 869 if(U_FAILURE(status)) { 870 dataerrln("Fail : in construction - %s", u_errorName(status)); 871 } else { 872 bi->setText(testString1); 873 doBoundaryTest(*bi, testString1, bounds1); 874 } 875 delete bi; 876} 877 878 879void RBBIAPITest::TestRegistration() { 880#if !UCONFIG_NO_SERVICE 881 UErrorCode status = U_ZERO_ERROR; 882 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); 883 // ok to not delete these if we exit because of error? 884 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); 885 BreakIterator* root_word = BreakIterator::createWordInstance("", status); 886 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); 887 888 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { 889 dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); 890 891 delete ja_word; 892 delete ja_char; 893 delete root_word; 894 delete root_char; 895 896 return; 897 } 898 899 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); 900 { 901#if 0 // With a dictionary based word breaking, ja_word is identical to root. 902 if (ja_word && *ja_word == *root_word) { 903 errln("japan not different from root"); 904 } 905#endif 906 } 907 908 { 909 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); 910 UBool fail = TRUE; 911 if(result){ 912 fail = *result != *ja_word; 913 } 914 delete result; 915 if (fail) { 916 errln("bad result for xx_XX/word"); 917 } 918 } 919 920 { 921 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); 922 UBool fail = TRUE; 923 if(result){ 924 fail = *result != *ja_char; 925 } 926 delete result; 927 if (fail) { 928 errln("bad result for ja_JP/char"); 929 } 930 } 931 932 { 933 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); 934 UBool fail = TRUE; 935 if(result){ 936 fail = *result != *root_char; 937 } 938 delete result; 939 if (fail) { 940 errln("bad result for xx_XX/char"); 941 } 942 } 943 944 { 945 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 946 UBool found = FALSE; 947 const UnicodeString* p; 948 while ((p = avail->snext(status))) { 949 if (p->compare("xx") == 0) { 950 found = TRUE; 951 break; 952 } 953 } 954 delete avail; 955 if (!found) { 956 errln("did not find test locale"); 957 } 958 } 959 960 { 961 UBool unreg = BreakIterator::unregister(key, status); 962 if (!unreg) { 963 errln("unable to unregister"); 964 } 965 } 966 967 { 968 BreakIterator* result = BreakIterator::createWordInstance("en_US", status); 969 BreakIterator* root = BreakIterator::createWordInstance("", status); 970 UBool fail = TRUE; 971 if(root){ 972 fail = *root != *result; 973 } 974 delete root; 975 delete result; 976 if (fail) { 977 errln("did not get root break"); 978 } 979 } 980 981 { 982 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 983 UBool found = FALSE; 984 const UnicodeString* p; 985 while ((p = avail->snext(status))) { 986 if (p->compare("xx") == 0) { 987 found = TRUE; 988 break; 989 } 990 } 991 delete avail; 992 if (found) { 993 errln("found test locale"); 994 } 995 } 996 997 { 998 int32_t count; 999 UBool foundLocale = FALSE; 1000 const Locale *avail = BreakIterator::getAvailableLocales(count); 1001 for (int i=0; i<count; i++) { 1002 if (avail[i] == Locale::getEnglish()) { 1003 foundLocale = TRUE; 1004 break; 1005 } 1006 } 1007 if (foundLocale == FALSE) { 1008 errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); 1009 } 1010 } 1011 1012 1013 // ja_word was adopted by factory 1014 delete ja_char; 1015 delete root_word; 1016 delete root_char; 1017#endif 1018} 1019 1020void RBBIAPITest::RoundtripRule(const char *dataFile) { 1021 UErrorCode status = U_ZERO_ERROR; 1022 UParseError parseError; 1023 parseError.line = 0; 1024 parseError.offset = 0; 1025 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status)); 1026 uint32_t length; 1027 const UChar *builtSource; 1028 const uint8_t *rbbiRules; 1029 const uint8_t *builtRules; 1030 1031 if (U_FAILURE(status)) { 1032 errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status)); 1033 return; 1034 } 1035 1036 builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); 1037 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); 1038 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); 1039 if (U_FAILURE(status)) { 1040 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", 1041 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset); 1042 errln(UnicodeString(builtSource)); 1043 return; 1044 }; 1045 rbbiRules = brkItr->getBinaryRules(length); 1046 logln("Comparing \"%s\" len=%d", dataFile, length); 1047 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { 1048 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile); 1049 return; 1050 } 1051 delete brkItr; 1052} 1053 1054void RBBIAPITest::TestRoundtripRules() { 1055 RoundtripRule("word"); 1056 RoundtripRule("title"); 1057 RoundtripRule("sent"); 1058 RoundtripRule("line"); 1059 RoundtripRule("char"); 1060 if (!quick) { 1061 RoundtripRule("word_POSIX"); 1062 } 1063} 1064 1065 1066// Check getBinaryRules() and construction of a break iterator from those rules. 1067 1068void RBBIAPITest::TestGetBinaryRules() { 1069 UErrorCode status=U_ZERO_ERROR; 1070 LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status)); 1071 if (U_FAILURE(status)) { 1072 dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status)); 1073 return; 1074 } 1075 RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias()); 1076 if (rbbi == NULL) { 1077 dataerrln("FAIL: RuleBasedBreakIterator is NULL"); 1078 return; 1079 } 1080 1081 // Check that the new line break iterator is nominally functional. 1082 UnicodeString helloWorld("Hello, World!"); 1083 rbbi->setText(helloWorld); 1084 int n = 0; 1085 while (bi->next() != UBRK_DONE) { 1086 ++n; 1087 } 1088 TEST_ASSERT(n == 2); 1089 1090 // Extract the binary rules as a uint8_t blob. 1091 uint32_t ruleLength; 1092 const uint8_t *binRules = rbbi->getBinaryRules(ruleLength); 1093 TEST_ASSERT(ruleLength > 0); 1094 TEST_ASSERT(binRules != NULL); 1095 1096 // Clone the binary rules, and create a break iterator from that. 1097 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator. 1098 uint8_t *clonedRules = new uint8_t[ruleLength]; 1099 memcpy(clonedRules, binRules, ruleLength); 1100 RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status); 1101 TEST_ASSERT_SUCCESS(status); 1102 1103 // Check that the cloned line break iterator is nominally alive. 1104 clonedBI.setText(helloWorld); 1105 n = 0; 1106 while (clonedBI.next() != UBRK_DONE) { 1107 ++n; 1108 } 1109 TEST_ASSERT(n == 2); 1110 1111 delete[] clonedRules; 1112} 1113 1114 1115void RBBIAPITest::TestRefreshInputText() { 1116 /* 1117 * RefreshInput changes out the input of a Break Iterator without 1118 * changing anything else in the iterator's state. Used with Java JNI, 1119 * when Java moves the underlying string storage. This test 1120 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence. 1121 * The right set of boundaries should still be found. 1122 */ 1123 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */ 1124 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; 1125 UErrorCode status = U_ZERO_ERROR; 1126 UText ut1 = UTEXT_INITIALIZER; 1127 UText ut2 = UTEXT_INITIALIZER; 1128 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); 1129 TEST_ASSERT_SUCCESS(status); 1130 1131 utext_openUChars(&ut1, testStr, -1, &status); 1132 TEST_ASSERT_SUCCESS(status); 1133 1134 if (U_SUCCESS(status)) { 1135 bi->setText(&ut1, status); 1136 TEST_ASSERT_SUCCESS(status); 1137 1138 /* Line boundaries will occur before each letter in the original string */ 1139 TEST_ASSERT(1 == bi->next()); 1140 TEST_ASSERT(3 == bi->next()); 1141 1142 /* Move the string, kill the original string. */ 1143 u_strcpy(movedStr, testStr); 1144 u_memset(testStr, 0x20, u_strlen(testStr)); 1145 utext_openUChars(&ut2, movedStr, -1, &status); 1146 TEST_ASSERT_SUCCESS(status); 1147 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status); 1148 TEST_ASSERT_SUCCESS(status); 1149 TEST_ASSERT(bi == returnedBI); 1150 1151 /* Find the following matches, now working in the moved string. */ 1152 TEST_ASSERT(5 == bi->next()); 1153 TEST_ASSERT(7 == bi->next()); 1154 TEST_ASSERT(8 == bi->next()); 1155 TEST_ASSERT(UBRK_DONE == bi->next()); 1156 1157 utext_close(&ut1); 1158 utext_close(&ut2); 1159 } 1160 delete bi; 1161 1162} 1163 1164#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION 1165static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) { 1166 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets 1167 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'")); 1168 1169 int32_t *pos = new int32_t[ustr.length()]; 1170 int32_t posCount = 0; 1171 1172 // calculate breaks up front, so we can print out 1173 // sans any debugging 1174 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) { 1175 pos[posCount++] = n; 1176 if(posCount>=ustr.length()) { 1177 it.errln("brk count exceeds string length!"); 1178 return; 1179 } 1180 } 1181 UnicodeString out; 1182 out.append((UChar)CHSTR); 1183 int32_t prev = 0; 1184 for(int32_t i=0;i<posCount;i++) { 1185 int32_t n=pos[i]; 1186 out.append(ustr.tempSubString(prev,n-prev)); 1187 out.append((UChar)PILCROW); 1188 prev=n; 1189 } 1190 out.append(ustr.tempSubString(prev,ustr.length()-prev)); 1191 out.append((UChar)CHEND); 1192 it.logln(out); 1193 1194 out.remove(); 1195 for(int32_t i=0;i<posCount;i++) { 1196 char tmp[100]; 1197 sprintf(tmp,"%d ",pos[i]); 1198 out.append(UnicodeString(tmp)); 1199 } 1200 it.logln(out); 1201 delete [] pos; 1202} 1203#endif 1204 1205void RBBIAPITest::TestFilteredBreakIteratorBuilder() { 1206#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION 1207 UErrorCode status = U_ZERO_ERROR; 1208 LocalPointer<FilteredBreakIteratorBuilder> builder; 1209 LocalPointer<BreakIterator> baseBI; 1210 LocalPointer<BreakIterator> filteredBI; 1211 LocalPointer<BreakIterator> frenchBI; 1212 1213 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited. 1214 const UnicodeString ABBR_MR("Mr."); 1215 const UnicodeString ABBR_CAPT("Capt."); 1216 1217 { 1218 logln("Constructing empty builder\n"); 1219 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1220 TEST_ASSERT_SUCCESS(status); 1221 1222 logln("Constructing base BI\n"); 1223 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1224 TEST_ASSERT_SUCCESS(status); 1225 1226 logln("Building new BI\n"); 1227 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1228 TEST_ASSERT_SUCCESS(status); 1229 1230 if (U_SUCCESS(status)) { 1231 logln("Testing:"); 1232 filteredBI->setText(text); 1233 TEST_ASSERT(20 == filteredBI->next()); // Mr. 1234 TEST_ASSERT(84 == filteredBI->next()); // recovered. 1235 TEST_ASSERT(90 == filteredBI->next()); // Capt. 1236 TEST_ASSERT(181 == filteredBI->next()); // Mr. 1237 TEST_ASSERT(278 == filteredBI->next()); // charge. 1238 filteredBI->first(); 1239 prtbrks(filteredBI.getAlias(), text, *this); 1240 } 1241 } 1242 1243 { 1244 logln("Constructing empty builder\n"); 1245 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1246 TEST_ASSERT_SUCCESS(status); 1247 1248 if (U_SUCCESS(status)) { 1249 logln("Adding Mr. as an exception\n"); 1250 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1251 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it 1252 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status)); 1253 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it 1254 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1255 TEST_ASSERT_SUCCESS(status); 1256 1257 logln("Constructing base BI\n"); 1258 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1259 TEST_ASSERT_SUCCESS(status); 1260 1261 logln("Building new BI\n"); 1262 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1263 TEST_ASSERT_SUCCESS(status); 1264 1265 logln("Testing:"); 1266 filteredBI->setText(text); 1267 TEST_ASSERT(84 == filteredBI->next()); 1268 TEST_ASSERT(90 == filteredBI->next());// Capt. 1269 TEST_ASSERT(278 == filteredBI->next()); 1270 filteredBI->first(); 1271 prtbrks(filteredBI.getAlias(), text, *this); 1272 } 1273 } 1274 1275 1276 { 1277 logln("Constructing empty builder\n"); 1278 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1279 TEST_ASSERT_SUCCESS(status); 1280 1281 if (U_SUCCESS(status)) { 1282 logln("Adding Mr. and Capt as an exception\n"); 1283 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1284 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status)); 1285 TEST_ASSERT_SUCCESS(status); 1286 1287 logln("Constructing base BI\n"); 1288 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1289 TEST_ASSERT_SUCCESS(status); 1290 1291 logln("Building new BI\n"); 1292 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1293 TEST_ASSERT_SUCCESS(status); 1294 1295 logln("Testing:"); 1296 filteredBI->setText(text); 1297 TEST_ASSERT(84 == filteredBI->next()); 1298 TEST_ASSERT(278 == filteredBI->next()); 1299 filteredBI->first(); 1300 prtbrks(filteredBI.getAlias(), text, *this); 1301 } 1302 } 1303 1304 1305 { 1306 logln("Constructing English builder\n"); 1307 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status)); 1308 TEST_ASSERT_SUCCESS(status); 1309 1310 logln("Constructing base BI\n"); 1311 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1312 TEST_ASSERT_SUCCESS(status); 1313 1314 if (U_SUCCESS(status)) { 1315 logln("unsuppressing 'Capt'"); 1316 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status)); 1317 1318 logln("Building new BI\n"); 1319 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1320 TEST_ASSERT_SUCCESS(status); 1321 1322 if(filteredBI.isValid()) { 1323 logln("Testing:"); 1324 filteredBI->setText(text); 1325 TEST_ASSERT(84 == filteredBI->next()); 1326 TEST_ASSERT(90 == filteredBI->next()); 1327 TEST_ASSERT(278 == filteredBI->next()); 1328 filteredBI->first(); 1329 prtbrks(filteredBI.getAlias(), text, *this); 1330 } 1331 } 1332 } 1333 1334 1335 { 1336 logln("Constructing English builder\n"); 1337 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status)); 1338 TEST_ASSERT_SUCCESS(status); 1339 1340 logln("Constructing base BI\n"); 1341 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1342 TEST_ASSERT_SUCCESS(status); 1343 1344 if (U_SUCCESS(status)) { 1345 logln("Building new BI\n"); 1346 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1347 TEST_ASSERT_SUCCESS(status); 1348 1349 if(filteredBI.isValid()) { 1350 logln("Testing:"); 1351 filteredBI->setText(text); 1352 TEST_ASSERT(84 == filteredBI->next()); 1353 TEST_ASSERT(278 == filteredBI->next()); 1354 filteredBI->first(); 1355 prtbrks(filteredBI.getAlias(), text, *this); 1356 } 1357 } 1358 } 1359 1360 // reenable once french is in 1361 { 1362 logln("Constructing French builder"); 1363 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status)); 1364 TEST_ASSERT_SUCCESS(status); 1365 1366 logln("Constructing base BI\n"); 1367 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status)); 1368 TEST_ASSERT_SUCCESS(status); 1369 1370 if (U_SUCCESS(status)) { 1371 logln("Building new BI\n"); 1372 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1373 TEST_ASSERT_SUCCESS(status); 1374 } 1375 1376 if(frenchBI.isValid()) { 1377 logln("Testing:"); 1378 UnicodeString frText("C'est MM. Duval."); 1379 frenchBI->setText(frText); 1380 TEST_ASSERT(16 == frenchBI->next()); 1381 TEST_ASSERT(BreakIterator::DONE == frenchBI->next()); 1382 frenchBI->first(); 1383 prtbrks(frenchBI.getAlias(), frText, *this); 1384 logln("Testing against English:"); 1385 filteredBI->setText(frText); 1386 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english. 1387 TEST_ASSERT(16 == filteredBI->next()); 1388 TEST_ASSERT(BreakIterator::DONE == filteredBI->next()); 1389 filteredBI->first(); 1390 prtbrks(filteredBI.getAlias(), frText, *this); 1391 1392 // Verify == 1393 TEST_ASSERT_TRUE(*frenchBI == *frenchBI); 1394 TEST_ASSERT_TRUE(*filteredBI != *frenchBI); 1395 TEST_ASSERT_TRUE(*frenchBI != *filteredBI); 1396 } else { 1397 dataerrln("French BI: not valid."); 1398 } 1399 } 1400 1401#else 1402 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION"); 1403#endif 1404} 1405 1406//--------------------------------------------- 1407// runIndexedTest 1408//--------------------------------------------- 1409 1410void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 1411{ 1412 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); 1413 TESTCASE_AUTO_BEGIN; 1414#if !UCONFIG_NO_FILE_IO 1415 TESTCASE_AUTO(TestCloneEquals); 1416 TESTCASE_AUTO(TestgetRules); 1417 TESTCASE_AUTO(TestHashCode); 1418 TESTCASE_AUTO(TestGetSetAdoptText); 1419 TESTCASE_AUTO(TestIteration); 1420#endif 1421 TESTCASE_AUTO(TestBuilder); 1422 TESTCASE_AUTO(TestQuoteGrouping); 1423 TESTCASE_AUTO(TestRuleStatusVec); 1424 TESTCASE_AUTO(TestBug2190); 1425#if !UCONFIG_NO_FILE_IO 1426 TESTCASE_AUTO(TestRegistration); 1427 TESTCASE_AUTO(TestBoilerPlate); 1428 TESTCASE_AUTO(TestRuleStatus); 1429 TESTCASE_AUTO(TestRoundtripRules); 1430 TESTCASE_AUTO(TestGetBinaryRules); 1431#endif 1432 TESTCASE_AUTO(TestRefreshInputText); 1433#if !UCONFIG_NO_BREAK_ITERATION 1434 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder); 1435#endif 1436 TESTCASE_AUTO_END; 1437} 1438 1439 1440//--------------------------------------------- 1441//Internal subroutines 1442//--------------------------------------------- 1443 1444void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){ 1445 logln((UnicodeString)"testIsBoundary():"); 1446 int32_t p = 0; 1447 UBool isB; 1448 for (int32_t i = 0; i < text.length(); i++) { 1449 isB = bi.isBoundary(i); 1450 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); 1451 1452 if (i == boundaries[p]) { 1453 if (!isB) 1454 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); 1455 p++; 1456 } 1457 else { 1458 if (isB) 1459 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); 1460 } 1461 } 1462} 1463void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ 1464 UnicodeString selected; 1465 UnicodeString expected=CharsToUnicodeString(expectedString); 1466 1467 if(gotoffset != expectedOffset) 1468 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); 1469 if(start <= gotoffset){ 1470 testString.extractBetween(start, gotoffset, selected); 1471 } 1472 else{ 1473 testString.extractBetween(gotoffset, start, selected); 1474 } 1475 if(selected.compare(expected) != 0) 1476 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); 1477 else 1478 logln(prettify("****selected \"" + selected + "\"")); 1479} 1480 1481#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1482