1/******************************************************************** 2 * Copyright (c) 1999-2009, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * Date Name Description 6 * 12/14/99 Madhu Creation. 7 * 01/12/2000 Madhu updated for changed API 8 ********************************************************************/ 9 10#include "unicode/utypes.h" 11 12#if !UCONFIG_NO_BREAK_ITERATION 13 14#include "unicode/uchar.h" 15#include "intltest.h" 16#include "unicode/rbbi.h" 17#include "unicode/schriter.h" 18#include "rbbiapts.h" 19#include "rbbidata.h" 20#include "cstring.h" 21#include "ubrkimpl.h" 22#include "unicode/ustring.h" 23#include "unicode/utext.h" 24#include "cmemory.h" 25 26/** 27 * API Test the RuleBasedBreakIterator class 28 */ 29 30 31#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ 32errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} 33 34#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 35errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}} 36 37void RBBIAPITest::TestCloneEquals() 38{ 39 40 UErrorCode status=U_ZERO_ERROR; 41 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 42 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 43 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 44 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 45 if(U_FAILURE(status)){ 46 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 47 return; 48 } 49 50 51 UnicodeString testString="Testing word break iterators's clone() and equals()"; 52 bi1->setText(testString); 53 bi2->setText(testString); 54 biequal->setText(testString); 55 56 bi3->setText("hello"); 57 58 logln((UnicodeString)"Testing equals()"); 59 60 logln((UnicodeString)"Testing == and !="); 61 UBool b = (*bi1 != *biequal); 62 b |= *bi1 == *bi2; 63 b |= *bi1 == *bi3; 64 if (b) { 65 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); 66 } 67 68 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) 69 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); 70 71 72 // Quick test of RulesBasedBreakIterator assignment - 73 // Check that 74 // two different iterators are != 75 // they are == after assignment 76 // source and dest iterator produce the same next() after assignment. 77 // deleting one doesn't disable the other. 78 logln("Testing assignment"); 79 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 80 if(U_FAILURE(status)){ 81 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 82 return; 83 } 84 85 RuleBasedBreakIterator biDefault, biDefault2; 86 if(U_FAILURE(status)){ 87 errln((UnicodeString)"FAIL : in construction of default iterator"); 88 return; 89 } 90 if (biDefault == *bix) { 91 errln((UnicodeString)"ERROR: iterators should not compare =="); 92 return; 93 } 94 if (biDefault != biDefault2) { 95 errln((UnicodeString)"ERROR: iterators should compare =="); 96 return; 97 } 98 99 100 UnicodeString HelloString("Hello Kitty"); 101 bix->setText(HelloString); 102 if (*bix == *bi2) { 103 errln(UnicodeString("ERROR: strings should not be equal before assignment.")); 104 } 105 *bix = *bi2; 106 if (*bix != *bi2) { 107 errln(UnicodeString("ERROR: strings should be equal before assignment.")); 108 } 109 110 int bixnext = bix->next(); 111 int bi2next = bi2->next(); 112 if (! (bixnext == bi2next && bixnext == 7)) { 113 errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); 114 } 115 delete bix; 116 if (bi2->next() != 8) { 117 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); 118 } 119 120 121 122 logln((UnicodeString)"Testing clone()"); 123 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); 124 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); 125 126 if(*bi1clone != *bi1 || *bi1clone != *biequal || 127 *bi1clone == *bi3 || *bi1clone == *bi2) 128 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); 129 130 if(*bi2clone == *bi1 || *bi2clone == *biequal || 131 *bi2clone == *bi3 || *bi2clone != *bi2) 132 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); 133 134 if(bi1->getText() != bi1clone->getText() || 135 bi2clone->getText() != bi2->getText() || 136 *bi2clone == *bi1clone ) 137 errln((UnicodeString)"ERROR: RBBI's clone() method failed"); 138 139 delete bi1clone; 140 delete bi2clone; 141 delete bi1; 142 delete bi3; 143 delete bi2; 144 delete biequal; 145} 146 147void RBBIAPITest::TestBoilerPlate() 148{ 149 UErrorCode status = U_ZERO_ERROR; 150 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); 151 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); 152 if (U_FAILURE(status)) { 153 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); 154 return; 155 } 156 if(*a!=*b){ 157 errln("Failed: boilerplate method operator!= does not return correct results"); 158 } 159 // Japanese word break iteratos is identical to root with 160 // a dictionary-based break iterator, but Thai character break iterator 161 // is still different from Root. 162 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status); 163 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("th"),status); 164 if(c && d){ 165 if(*c==*d){ 166 errln("Failed: boilerplate method opertator== does not return correct results"); 167 } 168 }else{ 169 errln("creation of break iterator failed"); 170 } 171 delete a; 172 delete b; 173 delete c; 174 delete d; 175} 176 177void RBBIAPITest::TestgetRules() 178{ 179 UErrorCode status=U_ZERO_ERROR; 180 181 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 182 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 183 if(U_FAILURE(status)){ 184 errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); 185 delete bi1; 186 delete bi2; 187 return; 188 } 189 190 191 192 logln((UnicodeString)"Testing toString()"); 193 194 bi1->setText((UnicodeString)"Hello there"); 195 196 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); 197 198 UnicodeString temp=bi1->getRules(); 199 UnicodeString temp2=bi2->getRules(); 200 UnicodeString temp3=bi3->getRules(); 201 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) 202 errln((UnicodeString)"ERROR: error in getRules() method"); 203 204 delete bi1; 205 delete bi2; 206 delete bi3; 207} 208void RBBIAPITest::TestHashCode() 209{ 210 UErrorCode status=U_ZERO_ERROR; 211 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 212 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 213 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 214 if(U_FAILURE(status)){ 215 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 216 delete bi1; 217 delete bi2; 218 delete bi3; 219 return; 220 } 221 222 223 logln((UnicodeString)"Testing hashCode()"); 224 225 bi1->setText((UnicodeString)"Hash code"); 226 bi2->setText((UnicodeString)"Hash code"); 227 bi3->setText((UnicodeString)"Hash code"); 228 229 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); 230 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); 231 232 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || 233 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) 234 errln((UnicodeString)"ERROR: identical objects have different hashcodes"); 235 236 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || 237 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) 238 errln((UnicodeString)"ERROR: different objects have same hashcodes"); 239 240 delete bi1clone; 241 delete bi2clone; 242 delete bi1; 243 delete bi2; 244 delete bi3; 245 246} 247void RBBIAPITest::TestGetSetAdoptText() 248{ 249 logln((UnicodeString)"Testing getText setText "); 250 UErrorCode status=U_ZERO_ERROR; 251 UnicodeString str1="first string."; 252 UnicodeString str2="Second string."; 253 RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 254 RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 255 if(U_FAILURE(status)){ 256 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 257 return; 258 } 259 260 261 CharacterIterator* text1= new StringCharacterIterator(str1); 262 CharacterIterator* text1Clone = text1->clone(); 263 CharacterIterator* text2= new StringCharacterIterator(str2); 264 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" 265 266 wordIter1->setText(str1); 267 CharacterIterator *tci = &wordIter1->getText(); 268 UnicodeString tstr; 269 tci->getText(tstr); 270 TEST_ASSERT(tstr == str1); 271 if(wordIter1->current() != 0) 272 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 273 274 wordIter1->next(2); 275 276 wordIter1->setText(str2); 277 if(wordIter1->current() != 0) 278 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 279 280 281 charIter1->adoptText(text1Clone); 282 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); 283 tci = &wordIter1->getText(); 284 tci->getText(tstr); 285 TEST_ASSERT(tstr == str2); 286 tci = &charIter1->getText(); 287 tci->getText(tstr); 288 TEST_ASSERT(tstr == str1); 289 290 291 RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone(); 292 rb->adoptText(text1); 293 if(rb->getText() != *text1) 294 errln((UnicodeString)"ERROR:1 error in adoptText "); 295 rb->adoptText(text2); 296 if(rb->getText() != *text2) 297 errln((UnicodeString)"ERROR:2 error in adoptText "); 298 299 // Adopt where iterator range is less than the entire orignal source string. 300 // (With the change of the break engine to working with UText internally, 301 // CharacterIterators starting at positions other than zero are not supported) 302 rb->adoptText(text3); 303 TEST_ASSERT(rb->preceding(2) == 0); 304 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); 305 //if(rb->preceding(2) != 3) { 306 // errln((UnicodeString)"ERROR:3 error in adoptText "); 307 //} 308 //if(rb->following(11) != BreakIterator::DONE) { 309 // errln((UnicodeString)"ERROR:4 error in adoptText "); 310 //} 311 312 // UText API 313 // 314 // Quick test to see if UText is working at all. 315 // 316 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ 317 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ 318 // 012345678901 319 320 status = U_ZERO_ERROR; 321 UText *ut = utext_openUTF8(NULL, s1, -1, &status); 322 wordIter1->setText(ut, status); 323 TEST_ASSERT_SUCCESS(status); 324 325 int32_t pos; 326 pos = wordIter1->first(); 327 TEST_ASSERT(pos==0); 328 pos = wordIter1->next(); 329 TEST_ASSERT(pos==5); 330 pos = wordIter1->next(); 331 TEST_ASSERT(pos==6); 332 pos = wordIter1->next(); 333 TEST_ASSERT(pos==11); 334 pos = wordIter1->next(); 335 TEST_ASSERT(pos==UBRK_DONE); 336 337 status = U_ZERO_ERROR; 338 UText *ut2 = utext_openUTF8(NULL, s2, -1, &status); 339 TEST_ASSERT_SUCCESS(status); 340 wordIter1->setText(ut2, status); 341 TEST_ASSERT_SUCCESS(status); 342 343 pos = wordIter1->first(); 344 TEST_ASSERT(pos==0); 345 pos = wordIter1->next(); 346 TEST_ASSERT(pos==3); 347 pos = wordIter1->next(); 348 TEST_ASSERT(pos==4); 349 350 pos = wordIter1->last(); 351 TEST_ASSERT(pos==6); 352 pos = wordIter1->previous(); 353 TEST_ASSERT(pos==4); 354 pos = wordIter1->previous(); 355 TEST_ASSERT(pos==3); 356 pos = wordIter1->previous(); 357 TEST_ASSERT(pos==0); 358 pos = wordIter1->previous(); 359 TEST_ASSERT(pos==UBRK_DONE); 360 361 status = U_ZERO_ERROR; 362 UnicodeString sEmpty; 363 UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status); 364 wordIter1->getUText(gut2, status); 365 TEST_ASSERT_SUCCESS(status); 366 utext_close(gut2); 367 368 utext_close(ut); 369 utext_close(ut2); 370 371 delete wordIter1; 372 delete charIter1; 373 delete rb; 374 375 } 376 377 378void RBBIAPITest::TestIteration() 379{ 380 // This test just verifies that the API is present. 381 // Testing for correct operation of the break rules happens elsewhere. 382 383 UErrorCode status=U_ZERO_ERROR; 384 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 385 if (U_FAILURE(status) || bi == NULL) { 386 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 387 } 388 delete bi; 389 390 status=U_ZERO_ERROR; 391 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 392 if (U_FAILURE(status) || bi == NULL) { 393 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); 394 } 395 delete bi; 396 397 status=U_ZERO_ERROR; 398 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); 399 if (U_FAILURE(status) || bi == NULL) { 400 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); 401 } 402 delete bi; 403 404 status=U_ZERO_ERROR; 405 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); 406 if (U_FAILURE(status) || bi == NULL) { 407 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); 408 } 409 delete bi; 410 411 status=U_ZERO_ERROR; 412 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); 413 if (U_FAILURE(status) || bi == NULL) { 414 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); 415 } 416 delete bi; 417 418 status=U_ZERO_ERROR; 419 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 420 if (U_FAILURE(status) || bi == NULL) { 421 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); 422 return; // Skip the rest of these tests. 423 } 424 425 426 UnicodeString testString="0123456789"; 427 bi->setText(testString); 428 429 int32_t i; 430 i = bi->first(); 431 if (i != 0) { 432 errln("Incorrect value from bi->first(). Expected 0, got %d.", i); 433 } 434 435 i = bi->last(); 436 if (i != 10) { 437 errln("Incorrect value from bi->last(). Expected 10, got %d", i); 438 } 439 440 // 441 // Previous 442 // 443 bi->last(); 444 i = bi->previous(); 445 if (i != 9) { 446 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); 447 } 448 449 450 bi->first(); 451 i = bi->previous(); 452 if (i != BreakIterator::DONE) { 453 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); 454 } 455 456 // 457 // next() 458 // 459 bi->first(); 460 i = bi->next(); 461 if (i != 1) { 462 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); 463 } 464 465 bi->last(); 466 i = bi->next(); 467 if (i != BreakIterator::DONE) { 468 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); 469 } 470 471 472 // 473 // current() 474 // 475 bi->first(); 476 i = bi->current(); 477 if (i != 0) { 478 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 479 } 480 481 bi->next(); 482 i = bi->current(); 483 if (i != 1) { 484 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); 485 } 486 487 bi->last(); 488 bi->next(); 489 i = bi->current(); 490 if (i != 10) { 491 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); 492 } 493 494 bi->first(); 495 bi->previous(); 496 i = bi->current(); 497 if (i != 0) { 498 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 499 } 500 501 502 // 503 // Following() 504 // 505 i = bi->following(4); 506 if (i != 5) { 507 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); 508 } 509 510 i = bi->following(9); 511 if (i != 10) { 512 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); 513 } 514 515 i = bi->following(10); 516 if (i != BreakIterator::DONE) { 517 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); 518 } 519 520 521 // 522 // Preceding 523 // 524 i = bi->preceding(4); 525 if (i != 3) { 526 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); 527 } 528 529 i = bi->preceding(10); 530 if (i != 9) { 531 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); 532 } 533 534 i = bi->preceding(1); 535 if (i != 0) { 536 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); 537 } 538 539 i = bi->preceding(0); 540 if (i != BreakIterator::DONE) { 541 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); 542 } 543 544 545 // 546 // isBoundary() 547 // 548 bi->first(); 549 if (bi->isBoundary(3) != TRUE) { 550 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); 551 } 552 i = bi->current(); 553 if (i != 3) { 554 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); 555 } 556 557 558 if (bi->isBoundary(11) != FALSE) { 559 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); 560 } 561 i = bi->current(); 562 if (i != 10) { 563 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); 564 } 565 566 // 567 // next(n) 568 // 569 bi->first(); 570 i = bi->next(4); 571 if (i != 4) { 572 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); 573 } 574 575 i = bi->next(6); 576 if (i != 10) { 577 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); 578 } 579 580 bi->first(); 581 i = bi->next(11); 582 if (i != BreakIterator::DONE) { 583 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); 584 } 585 586 delete bi; 587 588} 589 590 591 592 593 594 595void RBBIAPITest::TestBuilder() { 596 UnicodeString rulesString1 = "$Letters = [:L:];\n" 597 "$Numbers = [:N:];\n" 598 "$Letters+;\n" 599 "$Numbers+;\n" 600 "[^$Letters $Numbers];\n" 601 "!.*;\n"; 602 UnicodeString testString1 = "abc123..abc"; 603 // 01234567890 604 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; 605 UErrorCode status=U_ZERO_ERROR; 606 UParseError parseError; 607 608 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 609 if(U_FAILURE(status)) { 610 dataerrln("Fail : in construction - %s", u_errorName(status)); 611 } else { 612 bi->setText(testString1); 613 doBoundaryTest(*bi, testString1, bounds1); 614 } 615 delete bi; 616} 617 618 619// 620// TestQuoteGrouping 621// Single quotes within rules imply a grouping, so that a modifier 622// following the quoted text (* or +) applies to all of the quoted chars. 623// 624void RBBIAPITest::TestQuoteGrouping() { 625 UnicodeString rulesString1 = "#Here comes the rule...\n" 626 "'$@!'*;\n" // (\$\@\!)* 627 ".;\n"; 628 629 UnicodeString testString1 = "$@!$@!X$@!!X"; 630 // 0123456789012 631 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; 632 UErrorCode status=U_ZERO_ERROR; 633 UParseError parseError; 634 635 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 636 if(U_FAILURE(status)) { 637 dataerrln("Fail : in construction - %s", u_errorName(status)); 638 } else { 639 bi->setText(testString1); 640 doBoundaryTest(*bi, testString1, bounds1); 641 } 642 delete bi; 643} 644 645// 646// TestRuleStatus 647// Test word break rule status constants. 648// 649void RBBIAPITest::TestRuleStatus() { 650 UChar str[30]; 651 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing 652 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO 653 u_unescape("plain word 123.45 \\u30a1\\u30a2 ", 654 // 012345678901234567 8 9 0 655 // Katakana 656 str, 30); 657 UnicodeString testString1(str); 658 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; 659 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, 660 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, 661 UBRK_WORD_IDEO, UBRK_WORD_NONE}; 662 663 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, 664 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, 665 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT}; 666 667 UErrorCode status=U_ZERO_ERROR; 668 669 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); 670 if(U_FAILURE(status)) { 671 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); 672 } else { 673 bi->setText(testString1); 674 // First test that the breaks are in the right spots. 675 doBoundaryTest(*bi, testString1, bounds1); 676 677 // Then go back and check tag values 678 int32_t i = 0; 679 int32_t pos, tag; 680 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { 681 if (pos != bounds1[i]) { 682 errln("FAIL: unexpected word break at postion %d", pos); 683 break; 684 } 685 tag = bi->getRuleStatus(); 686 if (tag < tag_lo[i] || tag >= tag_hi[i]) { 687 errln("FAIL: incorrect tag value %d at position %d", tag, pos); 688 break; 689 } 690 691 // Check that we get the same tag values from getRuleStatusVec() 692 int32_t vec[10]; 693 int t = bi->getRuleStatusVec(vec, 10, status); 694 TEST_ASSERT_SUCCESS(status); 695 TEST_ASSERT(t==1); 696 TEST_ASSERT(vec[0] == tag); 697 } 698 } 699 delete bi; 700 701 // Now test line break status. This test mostly is to confirm that the status constants 702 // are correctly declared in the header. 703 testString1 = "test line. \n"; 704 // break type s s h 705 706 bi = (RuleBasedBreakIterator *) 707 BreakIterator::createLineInstance(Locale::getEnglish(), status); 708 if(U_FAILURE(status)) { 709 errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status)); 710 } else { 711 int32_t i = 0; 712 int32_t pos, tag; 713 UBool success; 714 715 bi->setText(testString1); 716 pos = bi->current(); 717 tag = bi->getRuleStatus(); 718 for (i=0; i<3; i++) { 719 switch (i) { 720 case 0: 721 success = pos==0 && tag==UBRK_LINE_SOFT; break; 722 case 1: 723 success = pos==5 && tag==UBRK_LINE_SOFT; break; 724 case 2: 725 success = pos==12 && tag==UBRK_LINE_HARD; break; 726 default: 727 success = FALSE; break; 728 } 729 if (success == FALSE) { 730 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", 731 i, pos, tag); 732 break; 733 } 734 pos = bi->next(); 735 tag = bi->getRuleStatus(); 736 } 737 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || 738 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || 739 UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) { 740 errln("UBRK_LINE_* constants from header are inconsistent."); 741 } 742 } 743 delete bi; 744 745} 746 747 748// 749// TestRuleStatusVec 750// Test the vector form of break rule status. 751// 752void RBBIAPITest::TestRuleStatusVec() { 753 UnicodeString rulesString( "[A-N]{100}; \n" 754 "[a-w]{200}; \n" 755 "[\\p{L}]{300}; \n" 756 "[\\p{N}]{400}; \n" 757 "[0-5]{500}; \n" 758 "!.*;\n", -1, US_INV); 759 UnicodeString testString1 = "Aapz5?"; 760 int32_t statusVals[10]; 761 int32_t numStatuses; 762 int32_t pos; 763 764 UErrorCode status=U_ZERO_ERROR; 765 UParseError parseError; 766 767 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); 768 if (U_FAILURE(status)) { 769 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); 770 } else { 771 bi->setText(testString1); 772 773 // A 774 pos = bi->next(); 775 TEST_ASSERT(pos==1); 776 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 777 TEST_ASSERT_SUCCESS(status); 778 TEST_ASSERT(numStatuses == 2); 779 TEST_ASSERT(statusVals[0] == 100); 780 TEST_ASSERT(statusVals[1] == 300); 781 782 // a 783 pos = bi->next(); 784 TEST_ASSERT(pos==2); 785 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 786 TEST_ASSERT_SUCCESS(status); 787 TEST_ASSERT(numStatuses == 2); 788 TEST_ASSERT(statusVals[0] == 200); 789 TEST_ASSERT(statusVals[1] == 300); 790 791 // p 792 pos = bi->next(); 793 TEST_ASSERT(pos==3); 794 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 795 TEST_ASSERT_SUCCESS(status); 796 TEST_ASSERT(numStatuses == 2); 797 TEST_ASSERT(statusVals[0] == 200); 798 TEST_ASSERT(statusVals[1] == 300); 799 800 // z 801 pos = bi->next(); 802 TEST_ASSERT(pos==4); 803 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 804 TEST_ASSERT_SUCCESS(status); 805 TEST_ASSERT(numStatuses == 1); 806 TEST_ASSERT(statusVals[0] == 300); 807 808 // 5 809 pos = bi->next(); 810 TEST_ASSERT(pos==5); 811 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 812 TEST_ASSERT_SUCCESS(status); 813 TEST_ASSERT(numStatuses == 2); 814 TEST_ASSERT(statusVals[0] == 400); 815 TEST_ASSERT(statusVals[1] == 500); 816 817 // ? 818 pos = bi->next(); 819 TEST_ASSERT(pos==6); 820 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 821 TEST_ASSERT_SUCCESS(status); 822 TEST_ASSERT(numStatuses == 1); 823 TEST_ASSERT(statusVals[0] == 0); 824 825 // 826 // Check buffer overflow error handling. Char == A 827 // 828 bi->first(); 829 pos = bi->next(); 830 TEST_ASSERT(pos==1); 831 memset(statusVals, -1, sizeof(statusVals)); 832 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); 833 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 834 TEST_ASSERT(numStatuses == 2); 835 TEST_ASSERT(statusVals[0] == -1); 836 837 status = U_ZERO_ERROR; 838 memset(statusVals, -1, sizeof(statusVals)); 839 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); 840 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 841 TEST_ASSERT(numStatuses == 2); 842 TEST_ASSERT(statusVals[0] == 100); 843 TEST_ASSERT(statusVals[1] == -1); 844 845 status = U_ZERO_ERROR; 846 memset(statusVals, -1, sizeof(statusVals)); 847 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); 848 TEST_ASSERT_SUCCESS(status); 849 TEST_ASSERT(numStatuses == 2); 850 TEST_ASSERT(statusVals[0] == 100); 851 TEST_ASSERT(statusVals[1] == 300); 852 TEST_ASSERT(statusVals[2] == -1); 853 } 854 delete bi; 855 856} 857 858// 859// Bug 2190 Regression test. Builder crash on rule consisting of only a 860// $variable reference 861void RBBIAPITest::TestBug2190() { 862 UnicodeString rulesString1 = "$aaa = abcd;\n" 863 "$bbb = $aaa;\n" 864 "$bbb;\n"; 865 UnicodeString testString1 = "abcdabcd"; 866 // 01234567890 867 int32_t bounds1[] = {0, 4, 8}; 868 UErrorCode status=U_ZERO_ERROR; 869 UParseError parseError; 870 871 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 872 if(U_FAILURE(status)) { 873 dataerrln("Fail : in construction - %s", u_errorName(status)); 874 } else { 875 bi->setText(testString1); 876 doBoundaryTest(*bi, testString1, bounds1); 877 } 878 delete bi; 879} 880 881 882void RBBIAPITest::TestRegistration() { 883#if !UCONFIG_NO_SERVICE 884 UErrorCode status = U_ZERO_ERROR; 885 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); 886 887 // ok to not delete these if we exit because of error? 888 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); 889 BreakIterator* root_word = BreakIterator::createWordInstance("", status); 890 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); 891 892 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { 893 dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); 894 delete ja_word; 895 delete ja_char; 896 delete root_word; 897 delete root_char; 898 899 return; 900 } 901 902 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); 903 { 904#if 0 // With a dictionary based word breaking, ja_word is identical to root. 905 if (ja_word && *ja_word == *root_word) { 906 errln("japan not different from root"); 907 } 908#endif 909 } 910 911 { 912 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); 913 UBool fail = TRUE; 914 if(result){ 915 fail = *result != *ja_word; 916 } 917 delete result; 918 if (fail) { 919 errln("bad result for xx_XX/word"); 920 } 921 } 922 923 { 924 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); 925 UBool fail = TRUE; 926 if(result){ 927 fail = *result != *ja_char; 928 } 929 delete result; 930 if (fail) { 931 errln("bad result for ja_JP/char"); 932 } 933 } 934 935 { 936 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); 937 UBool fail = TRUE; 938 if(result){ 939 fail = *result != *root_char; 940 } 941 delete result; 942 if (fail) { 943 errln("bad result for xx_XX/char"); 944 } 945 } 946 947 { 948 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 949 UBool found = FALSE; 950 const UnicodeString* p; 951 while ((p = avail->snext(status))) { 952 if (p->compare("xx") == 0) { 953 found = TRUE; 954 break; 955 } 956 } 957 delete avail; 958 if (!found) { 959 errln("did not find test locale"); 960 } 961 } 962 963 { 964 UBool unreg = BreakIterator::unregister(key, status); 965 if (!unreg) { 966 errln("unable to unregister"); 967 } 968 } 969 970 { 971 BreakIterator* result = BreakIterator::createWordInstance("en_US", status); 972 BreakIterator* root = BreakIterator::createWordInstance("", status); 973 UBool fail = TRUE; 974 if(root){ 975 fail = *root != *result; 976 } 977 delete root; 978 delete result; 979 if (fail) { 980 errln("did not get root break"); 981 } 982 } 983 984 { 985 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 986 UBool found = FALSE; 987 const UnicodeString* p; 988 while ((p = avail->snext(status))) { 989 if (p->compare("xx") == 0) { 990 found = TRUE; 991 break; 992 } 993 } 994 delete avail; 995 if (found) { 996 errln("found test locale"); 997 } 998 } 999 1000 { 1001 int32_t count; 1002 UBool foundLocale = FALSE; 1003 const Locale *avail = BreakIterator::getAvailableLocales(count); 1004 for (int i=0; i<count; i++) { 1005 if (avail[i] == Locale::getEnglish()) { 1006 foundLocale = TRUE; 1007 break; 1008 } 1009 } 1010 if (foundLocale == FALSE) { 1011 errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); 1012 } 1013 } 1014 1015 1016 // ja_word was adopted by factory 1017 delete ja_char; 1018 delete root_word; 1019 delete root_char; 1020#endif 1021} 1022 1023void RBBIAPITest::RoundtripRule(const char *dataFile) { 1024 UErrorCode status = U_ZERO_ERROR; 1025 UParseError parseError; 1026 parseError.line = 0; 1027 parseError.offset = 0; 1028 UDataMemory *data = udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status); 1029 uint32_t length; 1030 const UChar *builtSource; 1031 const uint8_t *rbbiRules; 1032 const uint8_t *builtRules; 1033 1034 if (U_FAILURE(status)) { 1035 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status)); 1036 return; 1037 } 1038 1039 builtRules = (const uint8_t *)udata_getMemory(data); 1040 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); 1041 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); 1042 if (U_FAILURE(status)) { 1043 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", 1044 u_errorName(status), parseError.line, parseError.offset); 1045 return; 1046 }; 1047 rbbiRules = brkItr->getBinaryRules(length); 1048 logln("Comparing \"%s\" len=%d", dataFile, length); 1049 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { 1050 errln("Built rules and rebuilt rules are different %s", dataFile); 1051 return; 1052 } 1053 delete brkItr; 1054 udata_close(data); 1055} 1056 1057void RBBIAPITest::TestRoundtripRules() { 1058 RoundtripRule("word"); 1059 RoundtripRule("title"); 1060 RoundtripRule("sent"); 1061 RoundtripRule("line"); 1062 RoundtripRule("char"); 1063 if (!quick) { 1064 RoundtripRule("word_ja"); 1065 RoundtripRule("word_POSIX"); 1066 } 1067} 1068 1069// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* 1070// (these are protected so we access them via a local class RBBIWithProtectedFunctions). 1071// This is just a sanity check, not a thorough test (e.g. we don't check that the 1072// first delete actually frees rulesCopy). 1073void RBBIAPITest::TestCreateFromRBBIData() { 1074 // Get some handy RBBIData 1075 const char *brkName = "word"; // or "sent", "line", "char", etc. 1076 UErrorCode status = U_ZERO_ERROR; 1077 UDataMemory * data = udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status); 1078 if ( U_SUCCESS(status) ) { 1079 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data); 1080 uint32_t length = builtRules->fLength; 1081 RBBIWithProtectedFunctions * brkItr; 1082 1083 // Try the memory-adopting constructor, need to copy the data first 1084 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); 1085 if ( rulesCopy ) { 1086 uprv_memcpy( rulesCopy, builtRules, length ); 1087 1088 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); 1089 if ( U_SUCCESS(status) ) { 1090 delete brkItr; // this should free rulesCopy 1091 } else { 1092 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) ); 1093 status = U_ZERO_ERROR;// reset for the next test 1094 uprv_free( rulesCopy ); 1095 } 1096 } 1097 1098 // Now try the non-adopting constructor 1099 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status); 1100 if ( U_SUCCESS(status) ) { 1101 delete brkItr; // this should NOT attempt to free builtRules 1102 if (builtRules->fLength != length) { // sanity check 1103 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" ); 1104 } 1105 } else { 1106 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) ); 1107 } 1108 1109 udata_close(data); 1110 } 1111} 1112 1113//--------------------------------------------- 1114// runIndexedTest 1115//--------------------------------------------- 1116 1117void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 1118{ 1119 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); 1120 switch (index) { 1121 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; 1122 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; 1123 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; 1124 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; 1125 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; 1126 case 4: name = "TestIteration"; if (exec) TestIteration(); break; 1127 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; 1128 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; 1129 case 7: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; 1130 case 8: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; 1131 case 9: name = "TestBug2190"; if (exec) TestBug2190(); break; 1132 case 10: name = "TestRegistration"; if (exec) TestRegistration(); break; 1133 case 11: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; 1134 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; 1135 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break; 1136 1137 default: name = ""; break; // needed to end loop 1138 } 1139} 1140 1141//--------------------------------------------- 1142//Internal subroutines 1143//--------------------------------------------- 1144 1145void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){ 1146 logln((UnicodeString)"testIsBoundary():"); 1147 int32_t p = 0; 1148 UBool isB; 1149 for (int32_t i = 0; i < text.length(); i++) { 1150 isB = bi.isBoundary(i); 1151 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); 1152 1153 if (i == boundaries[p]) { 1154 if (!isB) 1155 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); 1156 p++; 1157 } 1158 else { 1159 if (isB) 1160 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); 1161 } 1162 } 1163} 1164void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ 1165 UnicodeString selected; 1166 UnicodeString expected=CharsToUnicodeString(expectedString); 1167 1168 if(gotoffset != expectedOffset) 1169 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); 1170 if(start <= gotoffset){ 1171 testString.extractBetween(start, gotoffset, selected); 1172 } 1173 else{ 1174 testString.extractBetween(gotoffset, start, selected); 1175 } 1176 if(selected.compare(expected) != 0) 1177 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); 1178 else 1179 logln(prettify("****selected \"" + selected + "\"")); 1180} 1181 1182//--------------------------------------------- 1183//RBBIWithProtectedFunctions class functions 1184//--------------------------------------------- 1185 1186RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status) 1187 : RuleBasedBreakIterator(data, status) 1188{ 1189} 1190 1191RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status) 1192 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) 1193{ 1194} 1195 1196#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1197