1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/******************************************************************** 4 * Copyright (c) 1999-2016, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 ******************************************************************** 7 * Date Name Description 8 * 12/14/99 Madhu Creation. 9 * 01/12/2000 Madhu updated for changed API 10 ********************************************************************/ 11 12#include "unicode/utypes.h" 13 14#if !UCONFIG_NO_BREAK_ITERATION 15 16#include "unicode/uchar.h" 17#include "intltest.h" 18#include "unicode/rbbi.h" 19#include "unicode/schriter.h" 20#include "rbbiapts.h" 21#include "rbbidata.h" 22#include "cstring.h" 23#include "ubrkimpl.h" 24#include "unicode/locid.h" 25#include "unicode/ustring.h" 26#include "unicode/utext.h" 27#include "cmemory.h" 28#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING 29#include "unicode/filteredbrk.h" 30#include <stdio.h> // for sprintf 31#endif 32/** 33 * API Test the RuleBasedBreakIterator class 34 */ 35 36 37#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ 38dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} 39 40#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ 41 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} 42 43void RBBIAPITest::TestCloneEquals() 44{ 45 46 UErrorCode status=U_ZERO_ERROR; 47 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 48 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 49 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 50 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 51 if(U_FAILURE(status)){ 52 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 53 return; 54 } 55 56 57 UnicodeString testString="Testing word break iterators's clone() and equals()"; 58 bi1->setText(testString); 59 bi2->setText(testString); 60 biequal->setText(testString); 61 62 bi3->setText("hello"); 63 64 logln((UnicodeString)"Testing equals()"); 65 66 logln((UnicodeString)"Testing == and !="); 67 UBool b = (*bi1 != *biequal); 68 b |= *bi1 == *bi2; 69 b |= *bi1 == *bi3; 70 if (b) { 71 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); 72 } 73 74 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) 75 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); 76 77 78 // Quick test of RulesBasedBreakIterator assignment - 79 // Check that 80 // two different iterators are != 81 // they are == after assignment 82 // source and dest iterator produce the same next() after assignment. 83 // deleting one doesn't disable the other. 84 logln("Testing assignment"); 85 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 86 if(U_FAILURE(status)){ 87 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 88 return; 89 } 90 91 RuleBasedBreakIterator biDefault, biDefault2; 92 if(U_FAILURE(status)){ 93 errln((UnicodeString)"FAIL : in construction of default iterator"); 94 return; 95 } 96 if (biDefault == *bix) { 97 errln((UnicodeString)"ERROR: iterators should not compare =="); 98 return; 99 } 100 if (biDefault != biDefault2) { 101 errln((UnicodeString)"ERROR: iterators should compare =="); 102 return; 103 } 104 105 106 UnicodeString HelloString("Hello Kitty"); 107 bix->setText(HelloString); 108 if (*bix == *bi2) { 109 errln(UnicodeString("ERROR: strings should not be equal before assignment.")); 110 } 111 *bix = *bi2; 112 if (*bix != *bi2) { 113 errln(UnicodeString("ERROR: strings should be equal before assignment.")); 114 } 115 116 int bixnext = bix->next(); 117 int bi2next = bi2->next(); 118 if (! (bixnext == bi2next && bixnext == 7)) { 119 errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); 120 } 121 delete bix; 122 if (bi2->next() != 8) { 123 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); 124 } 125 126 127 128 logln((UnicodeString)"Testing clone()"); 129 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); 130 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); 131 132 if(*bi1clone != *bi1 || *bi1clone != *biequal || 133 *bi1clone == *bi3 || *bi1clone == *bi2) 134 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); 135 136 if(*bi2clone == *bi1 || *bi2clone == *biequal || 137 *bi2clone == *bi3 || *bi2clone != *bi2) 138 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); 139 140 if(bi1->getText() != bi1clone->getText() || 141 bi2clone->getText() != bi2->getText() || 142 *bi2clone == *bi1clone ) 143 errln((UnicodeString)"ERROR: RBBI's clone() method failed"); 144 145 delete bi1clone; 146 delete bi2clone; 147 delete bi1; 148 delete bi3; 149 delete bi2; 150 delete biequal; 151} 152 153void RBBIAPITest::TestBoilerPlate() 154{ 155 UErrorCode status = U_ZERO_ERROR; 156 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); 157 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); 158 if (U_FAILURE(status)) { 159 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 160 return; 161 } 162 if(*a!=*b){ 163 errln("Failed: boilerplate method operator!= does not return correct results"); 164 } 165 // Japanese word break iterators are identical to root with 166 // a dictionary-based break iterator 167 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status); 168 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status); 169 if(c && d){ 170 if(*c!=*d){ 171 errln("Failed: boilerplate method operator== does not return correct results"); 172 } 173 }else{ 174 errln("creation of break iterator failed"); 175 } 176 delete a; 177 delete b; 178 delete c; 179 delete d; 180} 181 182void RBBIAPITest::TestgetRules() 183{ 184 UErrorCode status=U_ZERO_ERROR; 185 186 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 187 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 188 if(U_FAILURE(status)){ 189 errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); 190 delete bi1; 191 delete bi2; 192 return; 193 } 194 195 196 197 logln((UnicodeString)"Testing toString()"); 198 199 bi1->setText((UnicodeString)"Hello there"); 200 201 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); 202 203 UnicodeString temp=bi1->getRules(); 204 UnicodeString temp2=bi2->getRules(); 205 UnicodeString temp3=bi3->getRules(); 206 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) 207 errln((UnicodeString)"ERROR: error in getRules() method"); 208 209 delete bi1; 210 delete bi2; 211 delete bi3; 212} 213void RBBIAPITest::TestHashCode() 214{ 215 UErrorCode status=U_ZERO_ERROR; 216 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 217 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 218 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 219 if(U_FAILURE(status)){ 220 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 221 delete bi1; 222 delete bi2; 223 delete bi3; 224 return; 225 } 226 227 228 logln((UnicodeString)"Testing hashCode()"); 229 230 bi1->setText((UnicodeString)"Hash code"); 231 bi2->setText((UnicodeString)"Hash code"); 232 bi3->setText((UnicodeString)"Hash code"); 233 234 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); 235 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); 236 237 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || 238 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) 239 errln((UnicodeString)"ERROR: identical objects have different hashcodes"); 240 241 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || 242 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) 243 errln((UnicodeString)"ERROR: different objects have same hashcodes"); 244 245 delete bi1clone; 246 delete bi2clone; 247 delete bi1; 248 delete bi2; 249 delete bi3; 250 251} 252void RBBIAPITest::TestGetSetAdoptText() 253{ 254 logln((UnicodeString)"Testing getText setText "); 255 IcuTestErrorCode status(*this, "TestGetSetAdoptText"); 256 UnicodeString str1="first string."; 257 UnicodeString str2="Second string."; 258 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); 259 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)); 260 if(status.isFailure()){ 261 errcheckln(status, "Fail : in construction - %s", status.errorName()); 262 return; 263 } 264 265 266 CharacterIterator* text1= new StringCharacterIterator(str1); 267 CharacterIterator* text1Clone = text1->clone(); 268 CharacterIterator* text2= new StringCharacterIterator(str2); 269 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" 270 271 wordIter1->setText(str1); 272 CharacterIterator *tci = &wordIter1->getText(); 273 UnicodeString tstr; 274 tci->getText(tstr); 275 TEST_ASSERT(tstr == str1); 276 if(wordIter1->current() != 0) 277 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 278 279 wordIter1->next(2); 280 281 wordIter1->setText(str2); 282 if(wordIter1->current() != 0) 283 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 284 285 286 charIter1->adoptText(text1Clone); 287 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); 288 tci = &wordIter1->getText(); 289 tci->getText(tstr); 290 TEST_ASSERT(tstr == str2); 291 tci = &charIter1->getText(); 292 tci->getText(tstr); 293 TEST_ASSERT(tstr == str1); 294 295 296 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone()); 297 rb->adoptText(text1); 298 if(rb->getText() != *text1) 299 errln((UnicodeString)"ERROR:1 error in adoptText "); 300 rb->adoptText(text2); 301 if(rb->getText() != *text2) 302 errln((UnicodeString)"ERROR:2 error in adoptText "); 303 304 // Adopt where iterator range is less than the entire orignal source string. 305 // (With the change of the break engine to working with UText internally, 306 // CharacterIterators starting at positions other than zero are not supported) 307 rb->adoptText(text3); 308 TEST_ASSERT(rb->preceding(2) == 0); 309 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); 310 //if(rb->preceding(2) != 3) { 311 // errln((UnicodeString)"ERROR:3 error in adoptText "); 312 //} 313 //if(rb->following(11) != BreakIterator::DONE) { 314 // errln((UnicodeString)"ERROR:4 error in adoptText "); 315 //} 316 317 // UText API 318 // 319 // Quick test to see if UText is working at all. 320 // 321 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ 322 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ 323 // 012345678901 324 325 status.reset(); 326 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); 327 wordIter1->setText(ut.getAlias(), status); 328 TEST_ASSERT_SUCCESS(status); 329 330 int32_t pos; 331 pos = wordIter1->first(); 332 TEST_ASSERT(pos==0); 333 pos = wordIter1->next(); 334 TEST_ASSERT(pos==5); 335 pos = wordIter1->next(); 336 TEST_ASSERT(pos==6); 337 pos = wordIter1->next(); 338 TEST_ASSERT(pos==11); 339 pos = wordIter1->next(); 340 TEST_ASSERT(pos==UBRK_DONE); 341 342 status.reset(); 343 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); 344 TEST_ASSERT_SUCCESS(status); 345 wordIter1->setText(ut2.getAlias(), status); 346 TEST_ASSERT_SUCCESS(status); 347 348 pos = wordIter1->first(); 349 TEST_ASSERT(pos==0); 350 pos = wordIter1->next(); 351 TEST_ASSERT(pos==3); 352 pos = wordIter1->next(); 353 TEST_ASSERT(pos==4); 354 355 pos = wordIter1->last(); 356 TEST_ASSERT(pos==6); 357 pos = wordIter1->previous(); 358 TEST_ASSERT(pos==4); 359 pos = wordIter1->previous(); 360 TEST_ASSERT(pos==3); 361 pos = wordIter1->previous(); 362 TEST_ASSERT(pos==0); 363 pos = wordIter1->previous(); 364 TEST_ASSERT(pos==UBRK_DONE); 365 366 status.reset(); 367 UnicodeString sEmpty; 368 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); 369 wordIter1->getUText(gut2.getAlias(), status); 370 TEST_ASSERT_SUCCESS(status); 371 status.reset(); 372} 373 374 375void RBBIAPITest::TestIteration() 376{ 377 // This test just verifies that the API is present. 378 // Testing for correct operation of the break rules happens elsewhere. 379 380 UErrorCode status=U_ZERO_ERROR; 381 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 382 if (U_FAILURE(status) || bi == NULL) { 383 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 384 } 385 delete bi; 386 387 status=U_ZERO_ERROR; 388 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 389 if (U_FAILURE(status) || bi == NULL) { 390 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); 391 } 392 delete bi; 393 394 status=U_ZERO_ERROR; 395 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); 396 if (U_FAILURE(status) || bi == NULL) { 397 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); 398 } 399 delete bi; 400 401 status=U_ZERO_ERROR; 402 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); 403 if (U_FAILURE(status) || bi == NULL) { 404 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); 405 } 406 delete bi; 407 408 status=U_ZERO_ERROR; 409 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); 410 if (U_FAILURE(status) || bi == NULL) { 411 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); 412 } 413 delete bi; 414 415 status=U_ZERO_ERROR; 416 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 417 if (U_FAILURE(status) || bi == NULL) { 418 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 419 return; // Skip the rest of these tests. 420 } 421 422 423 UnicodeString testString="0123456789"; 424 bi->setText(testString); 425 426 int32_t i; 427 i = bi->first(); 428 if (i != 0) { 429 errln("Incorrect value from bi->first(). Expected 0, got %d.", i); 430 } 431 432 i = bi->last(); 433 if (i != 10) { 434 errln("Incorrect value from bi->last(). Expected 10, got %d", i); 435 } 436 437 // 438 // Previous 439 // 440 bi->last(); 441 i = bi->previous(); 442 if (i != 9) { 443 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); 444 } 445 446 447 bi->first(); 448 i = bi->previous(); 449 if (i != BreakIterator::DONE) { 450 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); 451 } 452 453 // 454 // next() 455 // 456 bi->first(); 457 i = bi->next(); 458 if (i != 1) { 459 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); 460 } 461 462 bi->last(); 463 i = bi->next(); 464 if (i != BreakIterator::DONE) { 465 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); 466 } 467 468 469 // 470 // current() 471 // 472 bi->first(); 473 i = bi->current(); 474 if (i != 0) { 475 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 476 } 477 478 bi->next(); 479 i = bi->current(); 480 if (i != 1) { 481 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); 482 } 483 484 bi->last(); 485 bi->next(); 486 i = bi->current(); 487 if (i != 10) { 488 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); 489 } 490 491 bi->first(); 492 bi->previous(); 493 i = bi->current(); 494 if (i != 0) { 495 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 496 } 497 498 499 // 500 // Following() 501 // 502 i = bi->following(4); 503 if (i != 5) { 504 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); 505 } 506 507 i = bi->following(9); 508 if (i != 10) { 509 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); 510 } 511 512 i = bi->following(10); 513 if (i != BreakIterator::DONE) { 514 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); 515 } 516 517 518 // 519 // Preceding 520 // 521 i = bi->preceding(4); 522 if (i != 3) { 523 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); 524 } 525 526 i = bi->preceding(10); 527 if (i != 9) { 528 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); 529 } 530 531 i = bi->preceding(1); 532 if (i != 0) { 533 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); 534 } 535 536 i = bi->preceding(0); 537 if (i != BreakIterator::DONE) { 538 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); 539 } 540 541 542 // 543 // isBoundary() 544 // 545 bi->first(); 546 if (bi->isBoundary(3) != TRUE) { 547 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); 548 } 549 i = bi->current(); 550 if (i != 3) { 551 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); 552 } 553 554 555 if (bi->isBoundary(11) != FALSE) { 556 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); 557 } 558 i = bi->current(); 559 if (i != 10) { 560 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); 561 } 562 563 // 564 // next(n) 565 // 566 bi->first(); 567 i = bi->next(4); 568 if (i != 4) { 569 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); 570 } 571 572 i = bi->next(6); 573 if (i != 10) { 574 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); 575 } 576 577 bi->first(); 578 i = bi->next(11); 579 if (i != BreakIterator::DONE) { 580 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); 581 } 582 583 delete bi; 584 585} 586 587 588 589 590 591 592void RBBIAPITest::TestBuilder() { 593 UnicodeString rulesString1 = "$Letters = [:L:];\n" 594 "$Numbers = [:N:];\n" 595 "$Letters+;\n" 596 "$Numbers+;\n" 597 "[^$Letters $Numbers];\n" 598 "!.*;\n"; 599 UnicodeString testString1 = "abc123..abc"; 600 // 01234567890 601 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; 602 UErrorCode status=U_ZERO_ERROR; 603 UParseError parseError; 604 605 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 606 if(U_FAILURE(status)) { 607 dataerrln("Fail : in construction - %s", u_errorName(status)); 608 } else { 609 bi->setText(testString1); 610 doBoundaryTest(*bi, testString1, bounds1); 611 } 612 delete bi; 613} 614 615 616// 617// TestQuoteGrouping 618// Single quotes within rules imply a grouping, so that a modifier 619// following the quoted text (* or +) applies to all of the quoted chars. 620// 621void RBBIAPITest::TestQuoteGrouping() { 622 UnicodeString rulesString1 = "#Here comes the rule...\n" 623 "'$@!'*;\n" // (\$\@\!)* 624 ".;\n"; 625 626 UnicodeString testString1 = "$@!$@!X$@!!X"; 627 // 0123456789012 628 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; 629 UErrorCode status=U_ZERO_ERROR; 630 UParseError parseError; 631 632 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 633 if(U_FAILURE(status)) { 634 dataerrln("Fail : in construction - %s", u_errorName(status)); 635 } else { 636 bi->setText(testString1); 637 doBoundaryTest(*bi, testString1, bounds1); 638 } 639 delete bi; 640} 641 642// 643// TestRuleStatus 644// Test word break rule status constants. 645// 646void RBBIAPITest::TestRuleStatus() { 647 UChar str[30]; 648 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing 649 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO 650 u_unescape("plain word 123.45 \\u30a1\\u30a2 ", 651 // 012345678901234567 8 9 0 652 // Katakana 653 str, 30); 654 UnicodeString testString1(str); 655 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; 656 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, 657 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, 658 UBRK_WORD_IDEO, UBRK_WORD_NONE}; 659 660 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, 661 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, 662 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT}; 663 664 UErrorCode status=U_ZERO_ERROR; 665 666 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status); 667 if(U_FAILURE(status)) { 668 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 669 } else { 670 bi->setText(testString1); 671 // First test that the breaks are in the right spots. 672 doBoundaryTest(*bi, testString1, bounds1); 673 674 // Then go back and check tag values 675 int32_t i = 0; 676 int32_t pos, tag; 677 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { 678 if (pos != bounds1[i]) { 679 errln("FAIL: unexpected word break at postion %d", pos); 680 break; 681 } 682 tag = bi->getRuleStatus(); 683 if (tag < tag_lo[i] || tag >= tag_hi[i]) { 684 errln("FAIL: incorrect tag value %d at position %d", tag, pos); 685 break; 686 } 687 688 // Check that we get the same tag values from getRuleStatusVec() 689 int32_t vec[10]; 690 int t = bi->getRuleStatusVec(vec, 10, status); 691 TEST_ASSERT_SUCCESS(status); 692 TEST_ASSERT(t==1); 693 TEST_ASSERT(vec[0] == tag); 694 } 695 } 696 delete bi; 697 698 // Now test line break status. This test mostly is to confirm that the status constants 699 // are correctly declared in the header. 700 testString1 = "test line. \n"; 701 // break type s s h 702 703 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status); 704 if(U_FAILURE(status)) { 705 errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status)); 706 } else { 707 int32_t i = 0; 708 int32_t pos, tag; 709 UBool success; 710 711 bi->setText(testString1); 712 pos = bi->current(); 713 tag = bi->getRuleStatus(); 714 for (i=0; i<3; i++) { 715 switch (i) { 716 case 0: 717 success = pos==0 && tag==UBRK_LINE_SOFT; break; 718 case 1: 719 success = pos==5 && tag==UBRK_LINE_SOFT; break; 720 case 2: 721 success = pos==12 && tag==UBRK_LINE_HARD; break; 722 default: 723 success = FALSE; break; 724 } 725 if (success == FALSE) { 726 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", 727 i, pos, tag); 728 break; 729 } 730 pos = bi->next(); 731 tag = bi->getRuleStatus(); 732 } 733 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || 734 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || 735 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) { 736 errln("UBRK_LINE_* constants from header are inconsistent."); 737 } 738 } 739 delete bi; 740 741} 742 743 744// 745// TestRuleStatusVec 746// Test the vector form of break rule status. 747// 748void RBBIAPITest::TestRuleStatusVec() { 749 UnicodeString rulesString( "[A-N]{100}; \n" 750 "[a-w]{200}; \n" 751 "[\\p{L}]{300}; \n" 752 "[\\p{N}]{400}; \n" 753 "[0-5]{500}; \n" 754 "!.*;\n", -1, US_INV); 755 UnicodeString testString1 = "Aapz5?"; 756 int32_t statusVals[10]; 757 int32_t numStatuses; 758 int32_t pos; 759 760 UErrorCode status=U_ZERO_ERROR; 761 UParseError parseError; 762 763 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); 764 if (U_FAILURE(status)) { 765 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); 766 } else { 767 bi->setText(testString1); 768 769 // A 770 pos = bi->next(); 771 TEST_ASSERT(pos==1); 772 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 773 TEST_ASSERT_SUCCESS(status); 774 TEST_ASSERT(numStatuses == 2); 775 TEST_ASSERT(statusVals[0] == 100); 776 TEST_ASSERT(statusVals[1] == 300); 777 778 // a 779 pos = bi->next(); 780 TEST_ASSERT(pos==2); 781 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 782 TEST_ASSERT_SUCCESS(status); 783 TEST_ASSERT(numStatuses == 2); 784 TEST_ASSERT(statusVals[0] == 200); 785 TEST_ASSERT(statusVals[1] == 300); 786 787 // p 788 pos = bi->next(); 789 TEST_ASSERT(pos==3); 790 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 791 TEST_ASSERT_SUCCESS(status); 792 TEST_ASSERT(numStatuses == 2); 793 TEST_ASSERT(statusVals[0] == 200); 794 TEST_ASSERT(statusVals[1] == 300); 795 796 // z 797 pos = bi->next(); 798 TEST_ASSERT(pos==4); 799 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 800 TEST_ASSERT_SUCCESS(status); 801 TEST_ASSERT(numStatuses == 1); 802 TEST_ASSERT(statusVals[0] == 300); 803 804 // 5 805 pos = bi->next(); 806 TEST_ASSERT(pos==5); 807 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 808 TEST_ASSERT_SUCCESS(status); 809 TEST_ASSERT(numStatuses == 2); 810 TEST_ASSERT(statusVals[0] == 400); 811 TEST_ASSERT(statusVals[1] == 500); 812 813 // ? 814 pos = bi->next(); 815 TEST_ASSERT(pos==6); 816 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 817 TEST_ASSERT_SUCCESS(status); 818 TEST_ASSERT(numStatuses == 1); 819 TEST_ASSERT(statusVals[0] == 0); 820 821 // 822 // Check buffer overflow error handling. Char == A 823 // 824 bi->first(); 825 pos = bi->next(); 826 TEST_ASSERT(pos==1); 827 memset(statusVals, -1, sizeof(statusVals)); 828 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); 829 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 830 TEST_ASSERT(numStatuses == 2); 831 TEST_ASSERT(statusVals[0] == -1); 832 833 status = U_ZERO_ERROR; 834 memset(statusVals, -1, sizeof(statusVals)); 835 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); 836 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 837 TEST_ASSERT(numStatuses == 2); 838 TEST_ASSERT(statusVals[0] == 100); 839 TEST_ASSERT(statusVals[1] == -1); 840 841 status = U_ZERO_ERROR; 842 memset(statusVals, -1, sizeof(statusVals)); 843 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); 844 TEST_ASSERT_SUCCESS(status); 845 TEST_ASSERT(numStatuses == 2); 846 TEST_ASSERT(statusVals[0] == 100); 847 TEST_ASSERT(statusVals[1] == 300); 848 TEST_ASSERT(statusVals[2] == -1); 849 } 850 delete bi; 851 852} 853 854// 855// Bug 2190 Regression test. Builder crash on rule consisting of only a 856// $variable reference 857void RBBIAPITest::TestBug2190() { 858 UnicodeString rulesString1 = "$aaa = abcd;\n" 859 "$bbb = $aaa;\n" 860 "$bbb;\n"; 861 UnicodeString testString1 = "abcdabcd"; 862 // 01234567890 863 int32_t bounds1[] = {0, 4, 8}; 864 UErrorCode status=U_ZERO_ERROR; 865 UParseError parseError; 866 867 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 868 if(U_FAILURE(status)) { 869 dataerrln("Fail : in construction - %s", u_errorName(status)); 870 } else { 871 bi->setText(testString1); 872 doBoundaryTest(*bi, testString1, bounds1); 873 } 874 delete bi; 875} 876 877 878void RBBIAPITest::TestRegistration() { 879#if !UCONFIG_NO_SERVICE 880 UErrorCode status = U_ZERO_ERROR; 881 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); 882 // ok to not delete these if we exit because of error? 883 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); 884 BreakIterator* root_word = BreakIterator::createWordInstance("", status); 885 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); 886 887 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { 888 dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); 889 890 delete ja_word; 891 delete ja_char; 892 delete root_word; 893 delete root_char; 894 895 return; 896 } 897 898 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); 899 { 900#if 0 // With a dictionary based word breaking, ja_word is identical to root. 901 if (ja_word && *ja_word == *root_word) { 902 errln("japan not different from root"); 903 } 904#endif 905 } 906 907 { 908 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); 909 UBool fail = TRUE; 910 if(result){ 911 fail = *result != *ja_word; 912 } 913 delete result; 914 if (fail) { 915 errln("bad result for xx_XX/word"); 916 } 917 } 918 919 { 920 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); 921 UBool fail = TRUE; 922 if(result){ 923 fail = *result != *ja_char; 924 } 925 delete result; 926 if (fail) { 927 errln("bad result for ja_JP/char"); 928 } 929 } 930 931 { 932 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); 933 UBool fail = TRUE; 934 if(result){ 935 fail = *result != *root_char; 936 } 937 delete result; 938 if (fail) { 939 errln("bad result for xx_XX/char"); 940 } 941 } 942 943 { 944 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 945 UBool found = FALSE; 946 const UnicodeString* p; 947 while ((p = avail->snext(status))) { 948 if (p->compare("xx") == 0) { 949 found = TRUE; 950 break; 951 } 952 } 953 delete avail; 954 if (!found) { 955 errln("did not find test locale"); 956 } 957 } 958 959 { 960 UBool unreg = BreakIterator::unregister(key, status); 961 if (!unreg) { 962 errln("unable to unregister"); 963 } 964 } 965 966 { 967 BreakIterator* result = BreakIterator::createWordInstance("en_US", status); 968 BreakIterator* root = BreakIterator::createWordInstance("", status); 969 UBool fail = TRUE; 970 if(root){ 971 fail = *root != *result; 972 } 973 delete root; 974 delete result; 975 if (fail) { 976 errln("did not get root break"); 977 } 978 } 979 980 { 981 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 982 UBool found = FALSE; 983 const UnicodeString* p; 984 while ((p = avail->snext(status))) { 985 if (p->compare("xx") == 0) { 986 found = TRUE; 987 break; 988 } 989 } 990 delete avail; 991 if (found) { 992 errln("found test locale"); 993 } 994 } 995 996 { 997 int32_t count; 998 UBool foundLocale = FALSE; 999 const Locale *avail = BreakIterator::getAvailableLocales(count); 1000 for (int i=0; i<count; i++) { 1001 if (avail[i] == Locale::getEnglish()) { 1002 foundLocale = TRUE; 1003 break; 1004 } 1005 } 1006 if (foundLocale == FALSE) { 1007 errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); 1008 } 1009 } 1010 1011 1012 // ja_word was adopted by factory 1013 delete ja_char; 1014 delete root_word; 1015 delete root_char; 1016#endif 1017} 1018 1019void RBBIAPITest::RoundtripRule(const char *dataFile) { 1020 UErrorCode status = U_ZERO_ERROR; 1021 UParseError parseError; 1022 parseError.line = 0; 1023 parseError.offset = 0; 1024 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status)); 1025 uint32_t length; 1026 const UChar *builtSource; 1027 const uint8_t *rbbiRules; 1028 const uint8_t *builtRules; 1029 1030 if (U_FAILURE(status)) { 1031 errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status)); 1032 return; 1033 } 1034 1035 builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); 1036 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); 1037 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); 1038 if (U_FAILURE(status)) { 1039 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", 1040 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset); 1041 errln(UnicodeString(builtSource)); 1042 return; 1043 }; 1044 rbbiRules = brkItr->getBinaryRules(length); 1045 logln("Comparing \"%s\" len=%d", dataFile, length); 1046 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { 1047 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile); 1048 return; 1049 } 1050 delete brkItr; 1051} 1052 1053void RBBIAPITest::TestRoundtripRules() { 1054 RoundtripRule("word"); 1055 RoundtripRule("title"); 1056 RoundtripRule("sent"); 1057 RoundtripRule("line"); 1058 RoundtripRule("char"); 1059 if (!quick) { 1060 RoundtripRule("word_POSIX"); 1061 } 1062} 1063 1064 1065// Check getBinaryRules() and construction of a break iterator from those rules. 1066 1067void RBBIAPITest::TestGetBinaryRules() { 1068 UErrorCode status=U_ZERO_ERROR; 1069 LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status)); 1070 TEST_ASSERT_SUCCESS(status); 1071 RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias()); 1072 TEST_ASSERT(rbbi != NULL); 1073 1074 // Check that the new line break iterator is nominally functional. 1075 UnicodeString helloWorld("Hello, World!"); 1076 rbbi->setText(helloWorld); 1077 int n = 0; 1078 while (bi->next() != UBRK_DONE) { 1079 ++n; 1080 } 1081 TEST_ASSERT(n == 2); 1082 1083 // Extract the binary rules as a uint8_t blob. 1084 uint32_t ruleLength; 1085 const uint8_t *binRules = rbbi->getBinaryRules(ruleLength); 1086 TEST_ASSERT(ruleLength > 0); 1087 TEST_ASSERT(binRules != NULL); 1088 1089 // Clone the binary rules, and create a break iterator from that. 1090 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator. 1091 uint8_t *clonedRules = new uint8_t[ruleLength]; 1092 memcpy(clonedRules, binRules, ruleLength); 1093 RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status); 1094 TEST_ASSERT_SUCCESS(status); 1095 1096 // Check that the cloned line break iterator is nominally alive. 1097 clonedBI.setText(helloWorld); 1098 n = 0; 1099 while (clonedBI.next() != UBRK_DONE) { 1100 ++n; 1101 } 1102 TEST_ASSERT(n == 2); 1103 1104 delete[] clonedRules; 1105} 1106 1107 1108void RBBIAPITest::TestRefreshInputText() { 1109 /* 1110 * RefreshInput changes out the input of a Break Iterator without 1111 * changing anything else in the iterator's state. Used with Java JNI, 1112 * when Java moves the underlying string storage. This test 1113 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence. 1114 * The right set of boundaries should still be found. 1115 */ 1116 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */ 1117 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; 1118 UErrorCode status = U_ZERO_ERROR; 1119 UText ut1 = UTEXT_INITIALIZER; 1120 UText ut2 = UTEXT_INITIALIZER; 1121 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); 1122 TEST_ASSERT_SUCCESS(status); 1123 1124 utext_openUChars(&ut1, testStr, -1, &status); 1125 TEST_ASSERT_SUCCESS(status); 1126 1127 if (U_SUCCESS(status)) { 1128 bi->setText(&ut1, status); 1129 TEST_ASSERT_SUCCESS(status); 1130 1131 /* Line boundaries will occur before each letter in the original string */ 1132 TEST_ASSERT(1 == bi->next()); 1133 TEST_ASSERT(3 == bi->next()); 1134 1135 /* Move the string, kill the original string. */ 1136 u_strcpy(movedStr, testStr); 1137 u_memset(testStr, 0x20, u_strlen(testStr)); 1138 utext_openUChars(&ut2, movedStr, -1, &status); 1139 TEST_ASSERT_SUCCESS(status); 1140 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status); 1141 TEST_ASSERT_SUCCESS(status); 1142 TEST_ASSERT(bi == returnedBI); 1143 1144 /* Find the following matches, now working in the moved string. */ 1145 TEST_ASSERT(5 == bi->next()); 1146 TEST_ASSERT(7 == bi->next()); 1147 TEST_ASSERT(8 == bi->next()); 1148 TEST_ASSERT(UBRK_DONE == bi->next()); 1149 1150 utext_close(&ut1); 1151 utext_close(&ut2); 1152 } 1153 delete bi; 1154 1155} 1156 1157#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION 1158static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) { 1159 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets 1160 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'")); 1161 1162 int32_t *pos = new int32_t[ustr.length()]; 1163 int32_t posCount = 0; 1164 1165 // calculate breaks up front, so we can print out 1166 // sans any debugging 1167 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) { 1168 pos[posCount++] = n; 1169 if(posCount>=ustr.length()) { 1170 it.errln("brk count exceeds string length!"); 1171 return; 1172 } 1173 } 1174 UnicodeString out; 1175 out.append((UChar)CHSTR); 1176 int32_t prev = 0; 1177 for(int32_t i=0;i<posCount;i++) { 1178 int32_t n=pos[i]; 1179 out.append(ustr.tempSubString(prev,n-prev)); 1180 out.append((UChar)PILCROW); 1181 prev=n; 1182 } 1183 out.append(ustr.tempSubString(prev,ustr.length()-prev)); 1184 out.append((UChar)CHEND); 1185 it.logln(out); 1186 1187 out.remove(); 1188 for(int32_t i=0;i<posCount;i++) { 1189 char tmp[100]; 1190 sprintf(tmp,"%d ",pos[i]); 1191 out.append(UnicodeString(tmp)); 1192 } 1193 it.logln(out); 1194 delete [] pos; 1195} 1196#endif 1197 1198void RBBIAPITest::TestFilteredBreakIteratorBuilder() { 1199#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION 1200 UErrorCode status = U_ZERO_ERROR; 1201 LocalPointer<FilteredBreakIteratorBuilder> builder; 1202 LocalPointer<BreakIterator> baseBI; 1203 LocalPointer<BreakIterator> filteredBI; 1204 LocalPointer<BreakIterator> frenchBI; 1205 1206 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited. 1207 const UnicodeString ABBR_MR("Mr."); 1208 const UnicodeString ABBR_CAPT("Capt."); 1209 1210 { 1211 logln("Constructing empty builder\n"); 1212 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1213 TEST_ASSERT_SUCCESS(status); 1214 1215 logln("Constructing base BI\n"); 1216 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1217 TEST_ASSERT_SUCCESS(status); 1218 1219 logln("Building new BI\n"); 1220 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1221 TEST_ASSERT_SUCCESS(status); 1222 1223 if (U_SUCCESS(status)) { 1224 logln("Testing:"); 1225 filteredBI->setText(text); 1226 TEST_ASSERT(20 == filteredBI->next()); // Mr. 1227 TEST_ASSERT(84 == filteredBI->next()); // recovered. 1228 TEST_ASSERT(90 == filteredBI->next()); // Capt. 1229 TEST_ASSERT(181 == filteredBI->next()); // Mr. 1230 TEST_ASSERT(278 == filteredBI->next()); // charge. 1231 filteredBI->first(); 1232 prtbrks(filteredBI.getAlias(), text, *this); 1233 } 1234 } 1235 1236 { 1237 logln("Constructing empty builder\n"); 1238 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1239 TEST_ASSERT_SUCCESS(status); 1240 1241 if (U_SUCCESS(status)) { 1242 logln("Adding Mr. as an exception\n"); 1243 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1244 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it 1245 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status)); 1246 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it 1247 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1248 TEST_ASSERT_SUCCESS(status); 1249 1250 logln("Constructing base BI\n"); 1251 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1252 TEST_ASSERT_SUCCESS(status); 1253 1254 logln("Building new BI\n"); 1255 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1256 TEST_ASSERT_SUCCESS(status); 1257 1258 logln("Testing:"); 1259 filteredBI->setText(text); 1260 TEST_ASSERT(84 == filteredBI->next()); 1261 TEST_ASSERT(90 == filteredBI->next());// Capt. 1262 TEST_ASSERT(278 == filteredBI->next()); 1263 filteredBI->first(); 1264 prtbrks(filteredBI.getAlias(), text, *this); 1265 } 1266 } 1267 1268 1269 { 1270 logln("Constructing empty builder\n"); 1271 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); 1272 TEST_ASSERT_SUCCESS(status); 1273 1274 if (U_SUCCESS(status)) { 1275 logln("Adding Mr. and Capt as an exception\n"); 1276 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); 1277 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status)); 1278 TEST_ASSERT_SUCCESS(status); 1279 1280 logln("Constructing base BI\n"); 1281 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1282 TEST_ASSERT_SUCCESS(status); 1283 1284 logln("Building new BI\n"); 1285 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1286 TEST_ASSERT_SUCCESS(status); 1287 1288 logln("Testing:"); 1289 filteredBI->setText(text); 1290 TEST_ASSERT(84 == filteredBI->next()); 1291 TEST_ASSERT(278 == filteredBI->next()); 1292 filteredBI->first(); 1293 prtbrks(filteredBI.getAlias(), text, *this); 1294 } 1295 } 1296 1297 1298 { 1299 logln("Constructing English builder\n"); 1300 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status)); 1301 TEST_ASSERT_SUCCESS(status); 1302 1303 logln("Constructing base BI\n"); 1304 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1305 TEST_ASSERT_SUCCESS(status); 1306 1307 if (U_SUCCESS(status)) { 1308 logln("unsuppressing 'Capt'"); 1309 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status)); 1310 1311 logln("Building new BI\n"); 1312 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1313 TEST_ASSERT_SUCCESS(status); 1314 1315 if(filteredBI.isValid()) { 1316 logln("Testing:"); 1317 filteredBI->setText(text); 1318 TEST_ASSERT(84 == filteredBI->next()); 1319 TEST_ASSERT(90 == filteredBI->next()); 1320 TEST_ASSERT(278 == filteredBI->next()); 1321 filteredBI->first(); 1322 prtbrks(filteredBI.getAlias(), text, *this); 1323 } 1324 } 1325 } 1326 1327 1328 { 1329 logln("Constructing English builder\n"); 1330 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status)); 1331 TEST_ASSERT_SUCCESS(status); 1332 1333 logln("Constructing base BI\n"); 1334 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status)); 1335 TEST_ASSERT_SUCCESS(status); 1336 1337 if (U_SUCCESS(status)) { 1338 logln("Building new BI\n"); 1339 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1340 TEST_ASSERT_SUCCESS(status); 1341 1342 if(filteredBI.isValid()) { 1343 logln("Testing:"); 1344 filteredBI->setText(text); 1345 TEST_ASSERT(84 == filteredBI->next()); 1346 TEST_ASSERT(278 == filteredBI->next()); 1347 filteredBI->first(); 1348 prtbrks(filteredBI.getAlias(), text, *this); 1349 } 1350 } 1351 } 1352 1353 // reenable once french is in 1354 { 1355 logln("Constructing French builder"); 1356 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status)); 1357 TEST_ASSERT_SUCCESS(status); 1358 1359 logln("Constructing base BI\n"); 1360 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status)); 1361 TEST_ASSERT_SUCCESS(status); 1362 1363 if (U_SUCCESS(status)) { 1364 logln("Building new BI\n"); 1365 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status)); 1366 TEST_ASSERT_SUCCESS(status); 1367 } 1368 1369 if(frenchBI.isValid()) { 1370 logln("Testing:"); 1371 UnicodeString frText("C'est MM. Duval."); 1372 frenchBI->setText(frText); 1373 TEST_ASSERT(16 == frenchBI->next()); 1374 TEST_ASSERT(BreakIterator::DONE == frenchBI->next()); 1375 frenchBI->first(); 1376 prtbrks(frenchBI.getAlias(), frText, *this); 1377 logln("Testing against English:"); 1378 filteredBI->setText(frText); 1379 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english. 1380 TEST_ASSERT(16 == filteredBI->next()); 1381 TEST_ASSERT(BreakIterator::DONE == filteredBI->next()); 1382 filteredBI->first(); 1383 prtbrks(filteredBI.getAlias(), frText, *this); 1384 1385 // Verify == 1386 TEST_ASSERT_TRUE(*frenchBI == *frenchBI); 1387 TEST_ASSERT_TRUE(*filteredBI != *frenchBI); 1388 TEST_ASSERT_TRUE(*frenchBI != *filteredBI); 1389 } else { 1390 dataerrln("French BI: not valid."); 1391 } 1392 } 1393 1394#else 1395 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION"); 1396#endif 1397} 1398 1399//--------------------------------------------- 1400// runIndexedTest 1401//--------------------------------------------- 1402 1403void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 1404{ 1405 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); 1406 TESTCASE_AUTO_BEGIN; 1407#if !UCONFIG_NO_FILE_IO 1408 TESTCASE_AUTO(TestCloneEquals); 1409 TESTCASE_AUTO(TestgetRules); 1410 TESTCASE_AUTO(TestHashCode); 1411 TESTCASE_AUTO(TestGetSetAdoptText); 1412 TESTCASE_AUTO(TestIteration); 1413#endif 1414 TESTCASE_AUTO(TestBuilder); 1415 TESTCASE_AUTO(TestQuoteGrouping); 1416 TESTCASE_AUTO(TestRuleStatusVec); 1417 TESTCASE_AUTO(TestBug2190); 1418#if !UCONFIG_NO_FILE_IO 1419 TESTCASE_AUTO(TestRegistration); 1420 TESTCASE_AUTO(TestBoilerPlate); 1421 TESTCASE_AUTO(TestRuleStatus); 1422 TESTCASE_AUTO(TestRoundtripRules); 1423 TESTCASE_AUTO(TestGetBinaryRules); 1424#endif 1425 TESTCASE_AUTO(TestRefreshInputText); 1426#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING 1427 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder); 1428#endif 1429 TESTCASE_AUTO_END; 1430} 1431 1432 1433//--------------------------------------------- 1434//Internal subroutines 1435//--------------------------------------------- 1436 1437void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){ 1438 logln((UnicodeString)"testIsBoundary():"); 1439 int32_t p = 0; 1440 UBool isB; 1441 for (int32_t i = 0; i < text.length(); i++) { 1442 isB = bi.isBoundary(i); 1443 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); 1444 1445 if (i == boundaries[p]) { 1446 if (!isB) 1447 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); 1448 p++; 1449 } 1450 else { 1451 if (isB) 1452 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); 1453 } 1454 } 1455} 1456void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ 1457 UnicodeString selected; 1458 UnicodeString expected=CharsToUnicodeString(expectedString); 1459 1460 if(gotoffset != expectedOffset) 1461 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); 1462 if(start <= gotoffset){ 1463 testString.extractBetween(start, gotoffset, selected); 1464 } 1465 else{ 1466 testString.extractBetween(gotoffset, start, selected); 1467 } 1468 if(selected.compare(expected) != 0) 1469 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); 1470 else 1471 logln(prettify("****selected \"" + selected + "\"")); 1472} 1473 1474#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1475