rbbiapts.cpp revision b13da9df870a61b11249bf741347908dbea0edd8
1/******************************************************************** 2 * Copyright (c) 1999-2007, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * Date Name Description 6 * 12/14/99 Madhu Creation. 7 * 01/12/2000 Madhu updated for changed API 8 ********************************************************************/ 9 10#include "unicode/utypes.h" 11 12#if !UCONFIG_NO_BREAK_ITERATION 13 14#include "unicode/uchar.h" 15#include "intltest.h" 16#include "unicode/rbbi.h" 17#include "unicode/schriter.h" 18#include "rbbiapts.h" 19#include "rbbidata.h" 20#include "cstring.h" 21#include "ubrkimpl.h" 22#include "unicode/ustring.h" 23#include "unicode/utext.h" 24 25/** 26 * API Test the RuleBasedBreakIterator class 27 */ 28 29 30#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ 31errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} 32 33#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ 34errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}} 35 36void RBBIAPITest::TestCloneEquals() 37{ 38 39 UErrorCode status=U_ZERO_ERROR; 40 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 41 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 42 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 43 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 44 if(U_FAILURE(status)){ 45 errln((UnicodeString)"FAIL : in construction"); 46 return; 47 } 48 49 50 UnicodeString testString="Testing word break iterators's clone() and equals()"; 51 bi1->setText(testString); 52 bi2->setText(testString); 53 biequal->setText(testString); 54 55 bi3->setText("hello"); 56 57 logln((UnicodeString)"Testing equals()"); 58 59 logln((UnicodeString)"Testing == and !="); 60 UBool b = (*bi1 != *biequal); 61 b |= *bi1 == *bi2; 62 b |= *bi1 == *bi3; 63 if (b) { 64 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); 65 } 66 67 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) 68 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); 69 70 71 // Quick test of RulesBasedBreakIterator assignment - 72 // Check that 73 // two different iterators are != 74 // they are == after assignment 75 // source and dest iterator produce the same next() after assignment. 76 // deleting one doesn't disable the other. 77 logln("Testing assignment"); 78 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); 79 if(U_FAILURE(status)){ 80 errln((UnicodeString)"FAIL : in construction"); 81 return; 82 } 83 84 RuleBasedBreakIterator biDefault, biDefault2; 85 if(U_FAILURE(status)){ 86 errln((UnicodeString)"FAIL : in construction of default iterator"); 87 return; 88 } 89 if (biDefault == *bix) { 90 errln((UnicodeString)"ERROR: iterators should not compare =="); 91 return; 92 } 93 if (biDefault != biDefault2) { 94 errln((UnicodeString)"ERROR: iterators should compare =="); 95 return; 96 } 97 98 99 UnicodeString HelloString("Hello Kitty"); 100 bix->setText(HelloString); 101 if (*bix == *bi2) { 102 errln(UnicodeString("ERROR: strings should not be equal before assignment.")); 103 } 104 *bix = *bi2; 105 if (*bix != *bi2) { 106 errln(UnicodeString("ERROR: strings should be equal before assignment.")); 107 } 108 109 int bixnext = bix->next(); 110 int bi2next = bi2->next(); 111 if (! (bixnext == bi2next && bixnext == 7)) { 112 errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); 113 } 114 delete bix; 115 if (bi2->next() != 8) { 116 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); 117 } 118 119 120 121 logln((UnicodeString)"Testing clone()"); 122 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); 123 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); 124 125 if(*bi1clone != *bi1 || *bi1clone != *biequal || 126 *bi1clone == *bi3 || *bi1clone == *bi2) 127 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); 128 129 if(*bi2clone == *bi1 || *bi2clone == *biequal || 130 *bi2clone == *bi3 || *bi2clone != *bi2) 131 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); 132 133 if(bi1->getText() != bi1clone->getText() || 134 bi2clone->getText() != bi2->getText() || 135 *bi2clone == *bi1clone ) 136 errln((UnicodeString)"ERROR: RBBI's clone() method failed"); 137 138 delete bi1clone; 139 delete bi2clone; 140 delete bi1; 141 delete bi3; 142 delete bi2; 143 delete biequal; 144} 145 146void RBBIAPITest::TestBoilerPlate() 147{ 148 UErrorCode status = U_ZERO_ERROR; 149 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); 150 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); 151 if (U_FAILURE(status)) { 152 errln("Creation of break iterator failed %s", u_errorName(status)); 153 return; 154 } 155 if(*a!=*b){ 156 errln("Failed: boilerplate method operator!= does not return correct results"); 157 } 158 BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status); 159 if(a && c){ 160 if(*c==*a){ 161 errln("Failed: boilerplate method opertator== does not return correct results"); 162 } 163 }else{ 164 errln("creation of break iterator failed"); 165 } 166 delete a; 167 delete b; 168 delete c; 169} 170 171void RBBIAPITest::TestgetRules() 172{ 173 UErrorCode status=U_ZERO_ERROR; 174 175 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 176 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 177 if(U_FAILURE(status)){ 178 errln((UnicodeString)"FAIL: in construction"); 179 delete bi1; 180 delete bi2; 181 return; 182 } 183 184 185 186 logln((UnicodeString)"Testing toString()"); 187 188 bi1->setText((UnicodeString)"Hello there"); 189 190 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); 191 192 UnicodeString temp=bi1->getRules(); 193 UnicodeString temp2=bi2->getRules(); 194 UnicodeString temp3=bi3->getRules(); 195 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) 196 errln((UnicodeString)"ERROR: error in getRules() method"); 197 198 delete bi1; 199 delete bi2; 200 delete bi3; 201} 202void RBBIAPITest::TestHashCode() 203{ 204 UErrorCode status=U_ZERO_ERROR; 205 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 206 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 207 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 208 if(U_FAILURE(status)){ 209 errln((UnicodeString)"FAIL : in construction"); 210 delete bi1; 211 delete bi2; 212 delete bi3; 213 return; 214 } 215 216 217 logln((UnicodeString)"Testing hashCode()"); 218 219 bi1->setText((UnicodeString)"Hash code"); 220 bi2->setText((UnicodeString)"Hash code"); 221 bi3->setText((UnicodeString)"Hash code"); 222 223 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); 224 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); 225 226 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || 227 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) 228 errln((UnicodeString)"ERROR: identical objects have different hashcodes"); 229 230 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || 231 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) 232 errln((UnicodeString)"ERROR: different objects have same hashcodes"); 233 234 delete bi1clone; 235 delete bi2clone; 236 delete bi1; 237 delete bi2; 238 delete bi3; 239 240} 241void RBBIAPITest::TestGetSetAdoptText() 242{ 243 logln((UnicodeString)"Testing getText setText "); 244 UErrorCode status=U_ZERO_ERROR; 245 UnicodeString str1="first string."; 246 UnicodeString str2="Second string."; 247 RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 248 RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 249 if(U_FAILURE(status)){ 250 errln((UnicodeString)"FAIL : in construction"); 251 return; 252 } 253 254 255 CharacterIterator* text1= new StringCharacterIterator(str1); 256 CharacterIterator* text1Clone = text1->clone(); 257 CharacterIterator* text2= new StringCharacterIterator(str2); 258 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" 259 260 wordIter1->setText(str1); 261 CharacterIterator *tci = &wordIter1->getText(); 262 UnicodeString tstr; 263 tci->getText(tstr); 264 TEST_ASSERT(tstr == str1); 265 if(wordIter1->current() != 0) 266 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 267 268 wordIter1->next(2); 269 270 wordIter1->setText(str2); 271 if(wordIter1->current() != 0) 272 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); 273 274 275 charIter1->adoptText(text1Clone); 276 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); 277 tci = &wordIter1->getText(); 278 tci->getText(tstr); 279 TEST_ASSERT(tstr == str2); 280 tci = &charIter1->getText(); 281 tci->getText(tstr); 282 TEST_ASSERT(tstr == str1); 283 284 285 RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone(); 286 rb->adoptText(text1); 287 if(rb->getText() != *text1) 288 errln((UnicodeString)"ERROR:1 error in adoptText "); 289 rb->adoptText(text2); 290 if(rb->getText() != *text2) 291 errln((UnicodeString)"ERROR:2 error in adoptText "); 292 293 // Adopt where iterator range is less than the entire orignal source string. 294 // (With the change of the break engine to working with UText internally, 295 // CharacterIterators starting at positions other than zero are not supported) 296 rb->adoptText(text3); 297 TEST_ASSERT(rb->preceding(2) == 0); 298 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); 299 //if(rb->preceding(2) != 3) { 300 // errln((UnicodeString)"ERROR:3 error in adoptText "); 301 //} 302 //if(rb->following(11) != BreakIterator::DONE) { 303 // errln((UnicodeString)"ERROR:4 error in adoptText "); 304 //} 305 306 // UText API 307 // 308 // Quick test to see if UText is working at all. 309 // 310 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ 311 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ 312 // 012345678901 313 314 status = U_ZERO_ERROR; 315 UText *ut = utext_openUTF8(NULL, s1, -1, &status); 316 wordIter1->setText(ut, status); 317 TEST_ASSERT_SUCCESS(status); 318 319 int32_t pos; 320 pos = wordIter1->first(); 321 TEST_ASSERT(pos==0); 322 pos = wordIter1->next(); 323 TEST_ASSERT(pos==5); 324 pos = wordIter1->next(); 325 TEST_ASSERT(pos==6); 326 pos = wordIter1->next(); 327 TEST_ASSERT(pos==11); 328 pos = wordIter1->next(); 329 TEST_ASSERT(pos==UBRK_DONE); 330 331 status = U_ZERO_ERROR; 332 UText *ut2 = utext_openUTF8(NULL, s2, -1, &status); 333 TEST_ASSERT_SUCCESS(status); 334 wordIter1->setText(ut2, status); 335 TEST_ASSERT_SUCCESS(status); 336 337 pos = wordIter1->first(); 338 TEST_ASSERT(pos==0); 339 pos = wordIter1->next(); 340 TEST_ASSERT(pos==3); 341 pos = wordIter1->next(); 342 TEST_ASSERT(pos==4); 343 344 pos = wordIter1->last(); 345 TEST_ASSERT(pos==6); 346 pos = wordIter1->previous(); 347 TEST_ASSERT(pos==4); 348 pos = wordIter1->previous(); 349 TEST_ASSERT(pos==3); 350 pos = wordIter1->previous(); 351 TEST_ASSERT(pos==0); 352 pos = wordIter1->previous(); 353 TEST_ASSERT(pos==UBRK_DONE); 354 355 status = U_ZERO_ERROR; 356 UnicodeString sEmpty; 357 UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status); 358 wordIter1->getUText(gut2, status); 359 TEST_ASSERT_SUCCESS(status); 360 utext_close(gut2); 361 362 utext_close(ut); 363 utext_close(ut2); 364 365 delete wordIter1; 366 delete charIter1; 367 delete rb; 368 369 } 370 371 372void RBBIAPITest::TestIteration() 373{ 374 // This test just verifies that the API is present. 375 // Testing for correct operation of the break rules happens elsewhere. 376 377 UErrorCode status=U_ZERO_ERROR; 378 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 379 if (U_FAILURE(status) || bi == NULL) { 380 errln("Failure creating character break iterator. Status = %s", u_errorName(status)); 381 } 382 delete bi; 383 384 status=U_ZERO_ERROR; 385 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); 386 if (U_FAILURE(status) || bi == NULL) { 387 errln("Failure creating Word break iterator. Status = %s", u_errorName(status)); 388 } 389 delete bi; 390 391 status=U_ZERO_ERROR; 392 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); 393 if (U_FAILURE(status) || bi == NULL) { 394 errln("Failure creating Line break iterator. Status = %s", u_errorName(status)); 395 } 396 delete bi; 397 398 status=U_ZERO_ERROR; 399 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); 400 if (U_FAILURE(status) || bi == NULL) { 401 errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status)); 402 } 403 delete bi; 404 405 status=U_ZERO_ERROR; 406 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); 407 if (U_FAILURE(status) || bi == NULL) { 408 errln("Failure creating Title break iterator. Status = %s", u_errorName(status)); 409 } 410 delete bi; 411 412 status=U_ZERO_ERROR; 413 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); 414 if (U_FAILURE(status) || bi == NULL) { 415 errln("Failure creating character break iterator. Status = %s", u_errorName(status)); 416 return; // Skip the rest of these tests. 417 } 418 419 420 UnicodeString testString="0123456789"; 421 bi->setText(testString); 422 423 int32_t i; 424 i = bi->first(); 425 if (i != 0) { 426 errln("Incorrect value from bi->first(). Expected 0, got %d.", i); 427 } 428 429 i = bi->last(); 430 if (i != 10) { 431 errln("Incorrect value from bi->last(). Expected 10, got %d", i); 432 } 433 434 // 435 // Previous 436 // 437 bi->last(); 438 i = bi->previous(); 439 if (i != 9) { 440 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); 441 } 442 443 444 bi->first(); 445 i = bi->previous(); 446 if (i != BreakIterator::DONE) { 447 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); 448 } 449 450 // 451 // next() 452 // 453 bi->first(); 454 i = bi->next(); 455 if (i != 1) { 456 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); 457 } 458 459 bi->last(); 460 i = bi->next(); 461 if (i != BreakIterator::DONE) { 462 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); 463 } 464 465 466 // 467 // current() 468 // 469 bi->first(); 470 i = bi->current(); 471 if (i != 0) { 472 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 473 } 474 475 bi->next(); 476 i = bi->current(); 477 if (i != 1) { 478 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); 479 } 480 481 bi->last(); 482 bi->next(); 483 i = bi->current(); 484 if (i != 10) { 485 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); 486 } 487 488 bi->first(); 489 bi->previous(); 490 i = bi->current(); 491 if (i != 0) { 492 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); 493 } 494 495 496 // 497 // Following() 498 // 499 i = bi->following(4); 500 if (i != 5) { 501 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); 502 } 503 504 i = bi->following(9); 505 if (i != 10) { 506 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); 507 } 508 509 i = bi->following(10); 510 if (i != BreakIterator::DONE) { 511 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); 512 } 513 514 515 // 516 // Preceding 517 // 518 i = bi->preceding(4); 519 if (i != 3) { 520 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); 521 } 522 523 i = bi->preceding(10); 524 if (i != 9) { 525 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); 526 } 527 528 i = bi->preceding(1); 529 if (i != 0) { 530 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); 531 } 532 533 i = bi->preceding(0); 534 if (i != BreakIterator::DONE) { 535 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); 536 } 537 538 539 // 540 // isBoundary() 541 // 542 bi->first(); 543 if (bi->isBoundary(3) != TRUE) { 544 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); 545 } 546 i = bi->current(); 547 if (i != 3) { 548 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); 549 } 550 551 552 if (bi->isBoundary(11) != FALSE) { 553 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); 554 } 555 i = bi->current(); 556 if (i != 10) { 557 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); 558 } 559 560 // 561 // next(n) 562 // 563 bi->first(); 564 i = bi->next(4); 565 if (i != 4) { 566 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); 567 } 568 569 i = bi->next(6); 570 if (i != 10) { 571 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); 572 } 573 574 bi->first(); 575 i = bi->next(11); 576 if (i != BreakIterator::DONE) { 577 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); 578 } 579 580 delete bi; 581 582} 583 584 585 586 587 588 589void RBBIAPITest::TestBuilder() { 590 UnicodeString rulesString1 = "$Letters = [:L:];\n" 591 "$Numbers = [:N:];\n" 592 "$Letters+;\n" 593 "$Numbers+;\n" 594 "[^$Letters $Numbers];\n" 595 "!.*;\n"; 596 UnicodeString testString1 = "abc123..abc"; 597 // 01234567890 598 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; 599 UErrorCode status=U_ZERO_ERROR; 600 UParseError parseError; 601 602 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 603 if(U_FAILURE(status)) { 604 errln("FAIL : in construction"); 605 } else { 606 bi->setText(testString1); 607 doBoundaryTest(*bi, testString1, bounds1); 608 } 609 delete bi; 610} 611 612 613// 614// TestQuoteGrouping 615// Single quotes within rules imply a grouping, so that a modifier 616// following the quoted text (* or +) applies to all of the quoted chars. 617// 618void RBBIAPITest::TestQuoteGrouping() { 619 UnicodeString rulesString1 = "#Here comes the rule...\n" 620 "'$@!'*;\n" // (\$\@\!)* 621 ".;\n"; 622 623 UnicodeString testString1 = "$@!$@!X$@!!X"; 624 // 0123456789012 625 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; 626 UErrorCode status=U_ZERO_ERROR; 627 UParseError parseError; 628 629 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 630 if(U_FAILURE(status)) { 631 errln("FAIL : in construction"); 632 } else { 633 bi->setText(testString1); 634 doBoundaryTest(*bi, testString1, bounds1); 635 } 636 delete bi; 637} 638 639// 640// TestRuleStatus 641// Test word break rule status constants. 642// 643void RBBIAPITest::TestRuleStatus() { 644 UChar str[30]; 645 u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094", 646 // 012345678901234567 8 9 0 1 2 3 4 5 6 647 // Ideographic Katakana Hiragana 648 str, 30); 649 UnicodeString testString1(str); 650 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26}; 651 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, 652 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, 653 UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE, 654 UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA}; 655 656 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, 657 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, 658 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT, 659 UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT}; 660 661 UErrorCode status=U_ZERO_ERROR; 662 663 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); 664 if(U_FAILURE(status)) { 665 errln("FAIL : in construction"); 666 } else { 667 bi->setText(testString1); 668 // First test that the breaks are in the right spots. 669 doBoundaryTest(*bi, testString1, bounds1); 670 671 // Then go back and check tag values 672 int32_t i = 0; 673 int32_t pos, tag; 674 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { 675 if (pos != bounds1[i]) { 676 errln("FAIL: unexpected word break at postion %d", pos); 677 break; 678 } 679 tag = bi->getRuleStatus(); 680 if (tag < tag_lo[i] || tag >= tag_hi[i]) { 681 errln("FAIL: incorrect tag value %d at position %d", tag, pos); 682 break; 683 } 684 685 // Check that we get the same tag values from getRuleStatusVec() 686 int32_t vec[10]; 687 int t = bi->getRuleStatusVec(vec, 10, status); 688 TEST_ASSERT_SUCCESS(status); 689 TEST_ASSERT(t==1); 690 TEST_ASSERT(vec[0] == tag); 691 } 692 } 693 delete bi; 694 695 // Now test line break status. This test mostly is to confirm that the status constants 696 // are correctly declared in the header. 697 testString1 = "test line. \n"; 698 // break type s s h 699 700 bi = (RuleBasedBreakIterator *) 701 BreakIterator::createLineInstance(Locale::getEnglish(), status); 702 if(U_FAILURE(status)) { 703 errln("failed to create word break iterator."); 704 } else { 705 int32_t i = 0; 706 int32_t pos, tag; 707 UBool success; 708 709 bi->setText(testString1); 710 pos = bi->current(); 711 tag = bi->getRuleStatus(); 712 for (i=0; i<3; i++) { 713 switch (i) { 714 case 0: 715 success = pos==0 && tag==UBRK_LINE_SOFT; break; 716 case 1: 717 success = pos==5 && tag==UBRK_LINE_SOFT; break; 718 case 2: 719 success = pos==12 && tag==UBRK_LINE_HARD; break; 720 default: 721 success = FALSE; break; 722 } 723 if (success == FALSE) { 724 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", 725 i, pos, tag); 726 break; 727 } 728 pos = bi->next(); 729 tag = bi->getRuleStatus(); 730 } 731 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || 732 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || 733 UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) { 734 errln("UBRK_LINE_* constants from header are inconsistent."); 735 } 736 } 737 delete bi; 738 739} 740 741 742// 743// TestRuleStatusVec 744// Test the vector form of break rule status. 745// 746void RBBIAPITest::TestRuleStatusVec() { 747 UnicodeString rulesString = "[A-N]{100}; \n" 748 "[a-w]{200}; \n" 749 "[\\p{L}]{300}; \n" 750 "[\\p{N}]{400}; \n" 751 "[0-5]{500}; \n" 752 "!.*;\n"; 753 UnicodeString testString1 = "Aapz5?"; 754 int32_t statusVals[10]; 755 int32_t numStatuses; 756 int32_t pos; 757 758 UErrorCode status=U_ZERO_ERROR; 759 UParseError parseError; 760 761 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); 762 TEST_ASSERT_SUCCESS(status); 763 if (U_SUCCESS(status)) { 764 bi->setText(testString1); 765 766 // A 767 pos = bi->next(); 768 TEST_ASSERT(pos==1); 769 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 770 TEST_ASSERT_SUCCESS(status); 771 TEST_ASSERT(numStatuses == 2); 772 TEST_ASSERT(statusVals[0] == 100); 773 TEST_ASSERT(statusVals[1] == 300); 774 775 // a 776 pos = bi->next(); 777 TEST_ASSERT(pos==2); 778 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 779 TEST_ASSERT_SUCCESS(status); 780 TEST_ASSERT(numStatuses == 2); 781 TEST_ASSERT(statusVals[0] == 200); 782 TEST_ASSERT(statusVals[1] == 300); 783 784 // p 785 pos = bi->next(); 786 TEST_ASSERT(pos==3); 787 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 788 TEST_ASSERT_SUCCESS(status); 789 TEST_ASSERT(numStatuses == 2); 790 TEST_ASSERT(statusVals[0] == 200); 791 TEST_ASSERT(statusVals[1] == 300); 792 793 // z 794 pos = bi->next(); 795 TEST_ASSERT(pos==4); 796 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 797 TEST_ASSERT_SUCCESS(status); 798 TEST_ASSERT(numStatuses == 1); 799 TEST_ASSERT(statusVals[0] == 300); 800 801 // 5 802 pos = bi->next(); 803 TEST_ASSERT(pos==5); 804 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 805 TEST_ASSERT_SUCCESS(status); 806 TEST_ASSERT(numStatuses == 2); 807 TEST_ASSERT(statusVals[0] == 400); 808 TEST_ASSERT(statusVals[1] == 500); 809 810 // ? 811 pos = bi->next(); 812 TEST_ASSERT(pos==6); 813 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); 814 TEST_ASSERT_SUCCESS(status); 815 TEST_ASSERT(numStatuses == 1); 816 TEST_ASSERT(statusVals[0] == 0); 817 818 // 819 // Check buffer overflow error handling. Char == A 820 // 821 bi->first(); 822 pos = bi->next(); 823 TEST_ASSERT(pos==1); 824 memset(statusVals, -1, sizeof(statusVals)); 825 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); 826 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 827 TEST_ASSERT(numStatuses == 2); 828 TEST_ASSERT(statusVals[0] == -1); 829 830 status = U_ZERO_ERROR; 831 memset(statusVals, -1, sizeof(statusVals)); 832 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); 833 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); 834 TEST_ASSERT(numStatuses == 2); 835 TEST_ASSERT(statusVals[0] == 100); 836 TEST_ASSERT(statusVals[1] == -1); 837 838 status = U_ZERO_ERROR; 839 memset(statusVals, -1, sizeof(statusVals)); 840 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); 841 TEST_ASSERT_SUCCESS(status); 842 TEST_ASSERT(numStatuses == 2); 843 TEST_ASSERT(statusVals[0] == 100); 844 TEST_ASSERT(statusVals[1] == 300); 845 TEST_ASSERT(statusVals[2] == -1); 846 } 847 delete bi; 848 849} 850 851// 852// Bug 2190 Regression test. Builder crash on rule consisting of only a 853// $variable reference 854void RBBIAPITest::TestBug2190() { 855 UnicodeString rulesString1 = "$aaa = abcd;\n" 856 "$bbb = $aaa;\n" 857 "$bbb;\n"; 858 UnicodeString testString1 = "abcdabcd"; 859 // 01234567890 860 int32_t bounds1[] = {0, 4, 8}; 861 UErrorCode status=U_ZERO_ERROR; 862 UParseError parseError; 863 864 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); 865 if(U_FAILURE(status)) { 866 errln("FAIL : in construction"); 867 } else { 868 bi->setText(testString1); 869 doBoundaryTest(*bi, testString1, bounds1); 870 } 871 delete bi; 872} 873 874 875void RBBIAPITest::TestRegistration() { 876#if !UCONFIG_NO_SERVICE 877 UErrorCode status = U_ZERO_ERROR; 878 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); 879 880 // ok to not delete these if we exit because of error? 881 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); 882 BreakIterator* root_word = BreakIterator::createWordInstance("", status); 883 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); 884 885 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); 886 { 887 if (ja_word && *ja_word == *root_word) { 888 errln("japan not different from root"); 889 } 890 } 891 892 { 893 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); 894 UBool fail = TRUE; 895 if(result){ 896 fail = *result != *ja_word; 897 } 898 delete result; 899 if (fail) { 900 errln("bad result for xx_XX/word"); 901 } 902 } 903 904 { 905 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); 906 UBool fail = TRUE; 907 if(result){ 908 fail = *result != *ja_char; 909 } 910 delete result; 911 if (fail) { 912 errln("bad result for ja_JP/char"); 913 } 914 } 915 916 { 917 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); 918 UBool fail = TRUE; 919 if(result){ 920 fail = *result != *root_char; 921 } 922 delete result; 923 if (fail) { 924 errln("bad result for xx_XX/char"); 925 } 926 } 927 928 { 929 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 930 UBool found = FALSE; 931 const UnicodeString* p; 932 while ((p = avail->snext(status))) { 933 if (p->compare("xx") == 0) { 934 found = TRUE; 935 break; 936 } 937 } 938 delete avail; 939 if (!found) { 940 errln("did not find test locale"); 941 } 942 } 943 944 { 945 UBool unreg = BreakIterator::unregister(key, status); 946 if (!unreg) { 947 errln("unable to unregister"); 948 } 949 } 950 951 { 952 BreakIterator* result = BreakIterator::createWordInstance("en_US", status); 953 BreakIterator* root = BreakIterator::createWordInstance("", status); 954 UBool fail = TRUE; 955 if(root){ 956 fail = *root != *result; 957 } 958 delete root; 959 delete result; 960 if (fail) { 961 errln("did not get root break"); 962 } 963 } 964 965 { 966 StringEnumeration* avail = BreakIterator::getAvailableLocales(); 967 UBool found = FALSE; 968 const UnicodeString* p; 969 while ((p = avail->snext(status))) { 970 if (p->compare("xx") == 0) { 971 found = TRUE; 972 break; 973 } 974 } 975 delete avail; 976 if (found) { 977 errln("found test locale"); 978 } 979 } 980 981 { 982 int32_t count; 983 UBool foundLocale = FALSE; 984 const Locale *avail = BreakIterator::getAvailableLocales(count); 985 for (int i=0; i<count; i++) { 986 if (avail[i] == Locale::getEnglish()) { 987 foundLocale = TRUE; 988 break; 989 } 990 } 991 if (foundLocale == FALSE) { 992 errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); 993 } 994 } 995 996 997 // ja_word was adopted by factory 998 delete ja_char; 999 delete root_word; 1000 delete root_char; 1001#endif 1002} 1003 1004void RBBIAPITest::RoundtripRule(const char *dataFile) { 1005 UErrorCode status = U_ZERO_ERROR; 1006 UParseError parseError; 1007 parseError.line = 0; 1008 parseError.offset = 0; 1009 UDataMemory *data = udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status); 1010 uint32_t length; 1011 const UChar *builtSource; 1012 const uint8_t *rbbiRules; 1013 const uint8_t *builtRules; 1014 1015 if (U_FAILURE(status)) { 1016 errln("Can't open \"%s\"", dataFile); 1017 return; 1018 } 1019 1020 builtRules = (const uint8_t *)udata_getMemory(data); 1021 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); 1022 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); 1023 if (U_FAILURE(status)) { 1024 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", 1025 u_errorName(status), parseError.line, parseError.offset); 1026 return; 1027 }; 1028 rbbiRules = brkItr->getBinaryRules(length); 1029 logln("Comparing \"%s\" len=%d", dataFile, length); 1030 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { 1031 errln("Built rules and rebuilt rules are different %s", dataFile); 1032 return; 1033 } 1034 delete brkItr; 1035 udata_close(data); 1036} 1037 1038void RBBIAPITest::TestRoundtripRules() { 1039 RoundtripRule("word"); 1040 RoundtripRule("title"); 1041 RoundtripRule("sent"); 1042 RoundtripRule("line"); 1043 RoundtripRule("char"); 1044 if (!quick) { 1045 RoundtripRule("word_ja"); 1046 RoundtripRule("word_POSIX"); 1047 } 1048} 1049 1050//--------------------------------------------- 1051// runIndexedTest 1052//--------------------------------------------- 1053 1054void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 1055{ 1056 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); 1057 switch (index) { 1058 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; 1059 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; 1060 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; 1061 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; 1062 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; 1063 case 4: name = "TestIteration"; if (exec) TestIteration(); break; 1064 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; 1065 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; 1066 case 7: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; 1067 case 8: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; 1068 case 9: name = "TestBug2190"; if (exec) TestBug2190(); break; 1069 case 10: name = "TestRegistration"; if (exec) TestRegistration(); break; 1070 case 11: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; 1071 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; 1072 1073 default: name = ""; break; // needed to end loop 1074 } 1075} 1076 1077//--------------------------------------------- 1078//Internal subroutines 1079//--------------------------------------------- 1080 1081void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){ 1082 logln((UnicodeString)"testIsBoundary():"); 1083 int32_t p = 0; 1084 UBool isB; 1085 for (int32_t i = 0; i < text.length(); i++) { 1086 isB = bi.isBoundary(i); 1087 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); 1088 1089 if (i == boundaries[p]) { 1090 if (!isB) 1091 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); 1092 p++; 1093 } 1094 else { 1095 if (isB) 1096 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); 1097 } 1098 } 1099} 1100void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ 1101 UnicodeString selected; 1102 UnicodeString expected=CharsToUnicodeString(expectedString); 1103 1104 if(gotoffset != expectedOffset) 1105 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); 1106 if(start <= gotoffset){ 1107 testString.extractBetween(start, gotoffset, selected); 1108 } 1109 else{ 1110 testString.extractBetween(gotoffset, start, selected); 1111 } 1112 if(selected.compare(expected) != 0) 1113 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); 1114 else 1115 logln(prettify("****selected \"" + selected + "\"")); 1116} 1117 1118#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1119