1// 2// file: repattrn.cpp 3// 4/* 5*************************************************************************** 6* Copyright (C) 2002-2015 International Business Machines Corporation * 7* and others. All rights reserved. * 8*************************************************************************** 9*/ 10 11#include "unicode/utypes.h" 12 13#if !UCONFIG_NO_REGULAR_EXPRESSIONS 14 15#include "unicode/regex.h" 16#include "unicode/uclean.h" 17#include "uassert.h" 18#include "uhash.h" 19#include "uvector.h" 20#include "uvectr32.h" 21#include "uvectr64.h" 22#include "regexcmp.h" 23#include "regeximp.h" 24#include "regexst.h" 25 26U_NAMESPACE_BEGIN 27 28//-------------------------------------------------------------------------- 29// 30// RegexPattern Default Constructor 31// 32//-------------------------------------------------------------------------- 33RegexPattern::RegexPattern() { 34 // Init all of this instances data. 35 init(); 36} 37 38 39//-------------------------------------------------------------------------- 40// 41// Copy Constructor Note: This is a rather inefficient implementation, 42// but it probably doesn't matter. 43// 44//-------------------------------------------------------------------------- 45RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { 46 init(); 47 *this = other; 48} 49 50 51 52//-------------------------------------------------------------------------- 53// 54// Assignment Operator 55// 56//-------------------------------------------------------------------------- 57RegexPattern &RegexPattern::operator = (const RegexPattern &other) { 58 if (this == &other) { 59 // Source and destination are the same. Don't do anything. 60 return *this; 61 } 62 63 // Clean out any previous contents of object being assigned to. 64 zap(); 65 66 // Give target object a default initialization 67 init(); 68 69 // Copy simple fields 70 fDeferredStatus = other.fDeferredStatus; 71 72 if (U_FAILURE(fDeferredStatus)) { 73 return *this; 74 } 75 76 if (other.fPatternString == NULL) { 77 fPatternString = NULL; 78 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus); 79 } else { 80 fPatternString = new UnicodeString(*(other.fPatternString)); 81 if (fPatternString == NULL) { 82 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 83 } else { 84 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus); 85 } 86 } 87 if (U_FAILURE(fDeferredStatus)) { 88 return *this; 89 } 90 91 fFlags = other.fFlags; 92 fLiteralText = other.fLiteralText; 93 fMinMatchLen = other.fMinMatchLen; 94 fFrameSize = other.fFrameSize; 95 fDataSize = other.fDataSize; 96 fStaticSets = other.fStaticSets; 97 fStaticSets8 = other.fStaticSets8; 98 99 fStartType = other.fStartType; 100 fInitialStringIdx = other.fInitialStringIdx; 101 fInitialStringLen = other.fInitialStringLen; 102 *fInitialChars = *other.fInitialChars; 103 fInitialChar = other.fInitialChar; 104 *fInitialChars8 = *other.fInitialChars8; 105 fNeedsAltInput = other.fNeedsAltInput; 106 107 // Copy the pattern. It's just values, nothing deep to copy. 108 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); 109 fGroupMap->assign(*other.fGroupMap, fDeferredStatus); 110 111 // Copy the Unicode Sets. 112 // Could be made more efficient if the sets were reference counted and shared, 113 // but I doubt that pattern copying will be particularly common. 114 // Note: init() already added an empty element zero to fSets 115 int32_t i; 116 int32_t numSets = other.fSets->size(); 117 fSets8 = new Regex8BitSet[numSets]; 118 if (fSets8 == NULL) { 119 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 120 return *this; 121 } 122 for (i=1; i<numSets; i++) { 123 if (U_FAILURE(fDeferredStatus)) { 124 return *this; 125 } 126 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); 127 UnicodeSet *newSet = new UnicodeSet(*sourceSet); 128 if (newSet == NULL) { 129 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 130 break; 131 } 132 fSets->addElement(newSet, fDeferredStatus); 133 fSets8[i] = other.fSets8[i]; 134 } 135 136 // Copy the named capture group hash map. 137 int32_t hashPos = UHASH_FIRST; 138 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) { 139 if (U_FAILURE(fDeferredStatus)) { 140 break; 141 } 142 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer; 143 UnicodeString *key = new UnicodeString(*name); 144 int32_t val = hashEl->value.integer; 145 if (key == NULL) { 146 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 147 } else { 148 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus); 149 } 150 } 151 return *this; 152} 153 154 155//-------------------------------------------------------------------------- 156// 157// init Shared initialization for use by constructors. 158// Bring an uninitialized RegexPattern up to a default state. 159// 160//-------------------------------------------------------------------------- 161void RegexPattern::init() { 162 fFlags = 0; 163 fCompiledPat = 0; 164 fLiteralText.remove(); 165 fSets = NULL; 166 fSets8 = NULL; 167 fDeferredStatus = U_ZERO_ERROR; 168 fMinMatchLen = 0; 169 fFrameSize = 0; 170 fDataSize = 0; 171 fGroupMap = NULL; 172 fStaticSets = NULL; 173 fStaticSets8 = NULL; 174 fStartType = START_NO_INFO; 175 fInitialStringIdx = 0; 176 fInitialStringLen = 0; 177 fInitialChars = NULL; 178 fInitialChar = 0; 179 fInitialChars8 = NULL; 180 fNeedsAltInput = FALSE; 181 fNamedCaptureMap = NULL; 182 183 fPattern = NULL; // will be set later 184 fPatternString = NULL; // may be set later 185 fCompiledPat = new UVector64(fDeferredStatus); 186 fGroupMap = new UVector32(fDeferredStatus); 187 fSets = new UVector(fDeferredStatus); 188 fInitialChars = new UnicodeSet; 189 fInitialChars8 = new Regex8BitSet; 190 fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash function 191 uhash_compareUnicodeString, // Key comparator function 192 uhash_compareLong, // Value comparator function 193 &fDeferredStatus); 194 if (U_FAILURE(fDeferredStatus)) { 195 return; 196 } 197 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || 198 fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) { 199 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 200 return; 201 } 202 203 // Slot zero of the vector of sets is reserved. Fill it here. 204 fSets->addElement((int32_t)0, fDeferredStatus); 205 206 // fNamedCaptureMap owns its key strings, type (UnicodeString *) 207 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject); 208} 209 210 211//-------------------------------------------------------------------------- 212// 213// zap Delete everything owned by this RegexPattern. 214// 215//-------------------------------------------------------------------------- 216void RegexPattern::zap() { 217 delete fCompiledPat; 218 fCompiledPat = NULL; 219 int i; 220 for (i=1; i<fSets->size(); i++) { 221 UnicodeSet *s; 222 s = (UnicodeSet *)fSets->elementAt(i); 223 if (s != NULL) { 224 delete s; 225 } 226 } 227 delete fSets; 228 fSets = NULL; 229 delete[] fSets8; 230 fSets8 = NULL; 231 delete fGroupMap; 232 fGroupMap = NULL; 233 delete fInitialChars; 234 fInitialChars = NULL; 235 delete fInitialChars8; 236 fInitialChars8 = NULL; 237 if (fPattern != NULL) { 238 utext_close(fPattern); 239 fPattern = NULL; 240 } 241 if (fPatternString != NULL) { 242 delete fPatternString; 243 fPatternString = NULL; 244 } 245 uhash_close(fNamedCaptureMap); 246 fNamedCaptureMap = NULL; 247} 248 249 250//-------------------------------------------------------------------------- 251// 252// Destructor 253// 254//-------------------------------------------------------------------------- 255RegexPattern::~RegexPattern() { 256 zap(); 257} 258 259 260//-------------------------------------------------------------------------- 261// 262// Clone 263// 264//-------------------------------------------------------------------------- 265RegexPattern *RegexPattern::clone() const { 266 RegexPattern *copy = new RegexPattern(*this); 267 return copy; 268} 269 270 271//-------------------------------------------------------------------------- 272// 273// operator == (comparison) Consider to patterns to be == if the 274// pattern strings and the flags are the same. 275// Note that pattern strings with the same 276// characters can still be considered different. 277// 278//-------------------------------------------------------------------------- 279UBool RegexPattern::operator ==(const RegexPattern &other) const { 280 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) { 281 if (this->fPatternString != NULL && other.fPatternString != NULL) { 282 return *(this->fPatternString) == *(other.fPatternString); 283 } else if (this->fPattern == NULL) { 284 if (other.fPattern == NULL) { 285 return TRUE; 286 } 287 } else if (other.fPattern != NULL) { 288 UTEXT_SETNATIVEINDEX(this->fPattern, 0); 289 UTEXT_SETNATIVEINDEX(other.fPattern, 0); 290 return utext_equals(this->fPattern, other.fPattern); 291 } 292 } 293 return FALSE; 294} 295 296//--------------------------------------------------------------------- 297// 298// compile 299// 300//--------------------------------------------------------------------- 301RegexPattern * U_EXPORT2 302RegexPattern::compile(const UnicodeString ®ex, 303 uint32_t flags, 304 UParseError &pe, 305 UErrorCode &status) 306{ 307 if (U_FAILURE(status)) { 308 return NULL; 309 } 310 311 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 312 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 313 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 314 315 if ((flags & ~allFlags) != 0) { 316 status = U_REGEX_INVALID_FLAG; 317 return NULL; 318 } 319 320 if ((flags & UREGEX_CANON_EQ) != 0) { 321 status = U_REGEX_UNIMPLEMENTED; 322 return NULL; 323 } 324 325 RegexPattern *This = new RegexPattern; 326 if (This == NULL) { 327 status = U_MEMORY_ALLOCATION_ERROR; 328 return NULL; 329 } 330 if (U_FAILURE(This->fDeferredStatus)) { 331 status = This->fDeferredStatus; 332 delete This; 333 return NULL; 334 } 335 This->fFlags = flags; 336 337 RegexCompile compiler(This, status); 338 compiler.compile(regex, pe, status); 339 340 if (U_FAILURE(status)) { 341 delete This; 342 This = NULL; 343 } 344 345 return This; 346} 347 348 349// 350// compile, UText mode 351// 352RegexPattern * U_EXPORT2 353RegexPattern::compile(UText *regex, 354 uint32_t flags, 355 UParseError &pe, 356 UErrorCode &status) 357{ 358 if (U_FAILURE(status)) { 359 return NULL; 360 } 361 362 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | 363 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | 364 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL; 365 366 if ((flags & ~allFlags) != 0) { 367 status = U_REGEX_INVALID_FLAG; 368 return NULL; 369 } 370 371 if ((flags & UREGEX_CANON_EQ) != 0) { 372 status = U_REGEX_UNIMPLEMENTED; 373 return NULL; 374 } 375 376 RegexPattern *This = new RegexPattern; 377 if (This == NULL) { 378 status = U_MEMORY_ALLOCATION_ERROR; 379 return NULL; 380 } 381 if (U_FAILURE(This->fDeferredStatus)) { 382 status = This->fDeferredStatus; 383 delete This; 384 return NULL; 385 } 386 This->fFlags = flags; 387 388 RegexCompile compiler(This, status); 389 compiler.compile(regex, pe, status); 390 391 if (U_FAILURE(status)) { 392 delete This; 393 This = NULL; 394 } 395 396 return This; 397} 398 399// 400// compile with default flags. 401// 402RegexPattern * U_EXPORT2 403RegexPattern::compile(const UnicodeString ®ex, 404 UParseError &pe, 405 UErrorCode &err) 406{ 407 return compile(regex, 0, pe, err); 408} 409 410 411// 412// compile with default flags, UText mode 413// 414RegexPattern * U_EXPORT2 415RegexPattern::compile(UText *regex, 416 UParseError &pe, 417 UErrorCode &err) 418{ 419 return compile(regex, 0, pe, err); 420} 421 422 423// 424// compile with no UParseErr parameter. 425// 426RegexPattern * U_EXPORT2 427RegexPattern::compile(const UnicodeString ®ex, 428 uint32_t flags, 429 UErrorCode &err) 430{ 431 UParseError pe; 432 return compile(regex, flags, pe, err); 433} 434 435 436// 437// compile with no UParseErr parameter, UText mode 438// 439RegexPattern * U_EXPORT2 440RegexPattern::compile(UText *regex, 441 uint32_t flags, 442 UErrorCode &err) 443{ 444 UParseError pe; 445 return compile(regex, flags, pe, err); 446} 447 448 449//--------------------------------------------------------------------- 450// 451// flags 452// 453//--------------------------------------------------------------------- 454uint32_t RegexPattern::flags() const { 455 return fFlags; 456} 457 458 459//--------------------------------------------------------------------- 460// 461// matcher(UnicodeString, err) 462// 463//--------------------------------------------------------------------- 464RegexMatcher *RegexPattern::matcher(const UnicodeString &input, 465 UErrorCode &status) const { 466 RegexMatcher *retMatcher = matcher(status); 467 if (retMatcher != NULL) { 468 retMatcher->fDeferredStatus = status; 469 retMatcher->reset(input); 470 } 471 return retMatcher; 472} 473 474 475//--------------------------------------------------------------------- 476// 477// matcher(status) 478// 479//--------------------------------------------------------------------- 480RegexMatcher *RegexPattern::matcher(UErrorCode &status) const { 481 RegexMatcher *retMatcher = NULL; 482 483 if (U_FAILURE(status)) { 484 return NULL; 485 } 486 if (U_FAILURE(fDeferredStatus)) { 487 status = fDeferredStatus; 488 return NULL; 489 } 490 491 retMatcher = new RegexMatcher(this); 492 if (retMatcher == NULL) { 493 status = U_MEMORY_ALLOCATION_ERROR; 494 return NULL; 495 } 496 return retMatcher; 497} 498 499 500 501//--------------------------------------------------------------------- 502// 503// matches Convenience function to test for a match, starting 504// with a pattern string and a data string. 505// 506//--------------------------------------------------------------------- 507UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, 508 const UnicodeString &input, 509 UParseError &pe, 510 UErrorCode &status) { 511 512 if (U_FAILURE(status)) {return FALSE;} 513 514 UBool retVal; 515 RegexPattern *pat = NULL; 516 RegexMatcher *matcher = NULL; 517 518 pat = RegexPattern::compile(regex, 0, pe, status); 519 matcher = pat->matcher(input, status); 520 retVal = matcher->matches(status); 521 522 delete matcher; 523 delete pat; 524 return retVal; 525} 526 527 528// 529// matches, UText mode 530// 531UBool U_EXPORT2 RegexPattern::matches(UText *regex, 532 UText *input, 533 UParseError &pe, 534 UErrorCode &status) { 535 536 if (U_FAILURE(status)) {return FALSE;} 537 538 UBool retVal = FALSE; 539 RegexPattern *pat = NULL; 540 RegexMatcher *matcher = NULL; 541 542 pat = RegexPattern::compile(regex, 0, pe, status); 543 matcher = pat->matcher(status); 544 if (U_SUCCESS(status)) { 545 matcher->reset(input); 546 retVal = matcher->matches(status); 547 } 548 549 delete matcher; 550 delete pat; 551 return retVal; 552} 553 554 555 556 557 558//--------------------------------------------------------------------- 559// 560// pattern 561// 562//--------------------------------------------------------------------- 563UnicodeString RegexPattern::pattern() const { 564 if (fPatternString != NULL) { 565 return *fPatternString; 566 } else if (fPattern == NULL) { 567 return UnicodeString(); 568 } else { 569 UErrorCode status = U_ZERO_ERROR; 570 int64_t nativeLen = utext_nativeLength(fPattern); 571 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error 572 UnicodeString result; 573 574 status = U_ZERO_ERROR; 575 UChar *resultChars = result.getBuffer(len16); 576 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning 577 result.releaseBuffer(len16); 578 579 return result; 580 } 581} 582 583 584 585 586//--------------------------------------------------------------------- 587// 588// patternText 589// 590//--------------------------------------------------------------------- 591UText *RegexPattern::patternText(UErrorCode &status) const { 592 if (U_FAILURE(status)) {return NULL;} 593 status = U_ZERO_ERROR; 594 595 if (fPattern != NULL) { 596 return fPattern; 597 } else { 598 RegexStaticSets::initGlobals(&status); 599 return RegexStaticSets::gStaticSets->fEmptyText; 600 } 601} 602 603 604//-------------------------------------------------------------------------------- 605// 606// groupNumberFromName() 607// 608//-------------------------------------------------------------------------------- 609int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const { 610 if (U_FAILURE(status)) { 611 return 0; 612 } 613 614 // No need to explicitly check for syntactically valid names. 615 // Invalid ones will never be in the map, and the lookup will fail. 616 617 int32_t number = uhash_geti(fNamedCaptureMap, &groupName); 618 if (number == 0) { 619 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; 620 } 621 return number; 622} 623 624int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const { 625 if (U_FAILURE(status)) { 626 return 0; 627 } 628 UnicodeString name(groupName, nameLength, US_INV); 629 return groupNumberFromName(name, status); 630} 631 632 633//--------------------------------------------------------------------- 634// 635// split 636// 637//--------------------------------------------------------------------- 638int32_t RegexPattern::split(const UnicodeString &input, 639 UnicodeString dest[], 640 int32_t destCapacity, 641 UErrorCode &status) const 642{ 643 if (U_FAILURE(status)) { 644 return 0; 645 }; 646 647 RegexMatcher m(this); 648 int32_t r = 0; 649 // Check m's status to make sure all is ok. 650 if (U_SUCCESS(m.fDeferredStatus)) { 651 r = m.split(input, dest, destCapacity, status); 652 } 653 return r; 654} 655 656// 657// split, UText mode 658// 659int32_t RegexPattern::split(UText *input, 660 UText *dest[], 661 int32_t destCapacity, 662 UErrorCode &status) const 663{ 664 if (U_FAILURE(status)) { 665 return 0; 666 }; 667 668 RegexMatcher m(this); 669 int32_t r = 0; 670 // Check m's status to make sure all is ok. 671 if (U_SUCCESS(m.fDeferredStatus)) { 672 r = m.split(input, dest, destCapacity, status); 673 } 674 return r; 675} 676 677 678 679//--------------------------------------------------------------------- 680// 681// dump Output the compiled form of the pattern. 682// Debugging function only. 683// 684//--------------------------------------------------------------------- 685void RegexPattern::dumpOp(int32_t index) const { 686 (void)index; // Suppress warnings in non-debug build. 687#if defined(REGEX_DEBUG) 688 static const char * const opNames[] = {URX_OPCODE_NAMES}; 689 int32_t op = fCompiledPat->elementAti(index); 690 int32_t val = URX_VAL(op); 691 int32_t type = URX_TYPE(op); 692 int32_t pinnedType = type; 693 if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) { 694 pinnedType = 0; 695 } 696 697 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]); 698 switch (type) { 699 case URX_NOP: 700 case URX_DOTANY: 701 case URX_DOTANY_ALL: 702 case URX_FAIL: 703 case URX_CARET: 704 case URX_DOLLAR: 705 case URX_BACKSLASH_G: 706 case URX_BACKSLASH_X: 707 case URX_END: 708 case URX_DOLLAR_M: 709 case URX_CARET_M: 710 // Types with no operand field of interest. 711 break; 712 713 case URX_RESERVED_OP: 714 case URX_START_CAPTURE: 715 case URX_END_CAPTURE: 716 case URX_STATE_SAVE: 717 case URX_JMP: 718 case URX_JMP_SAV: 719 case URX_JMP_SAV_X: 720 case URX_BACKSLASH_B: 721 case URX_BACKSLASH_BU: 722 case URX_BACKSLASH_D: 723 case URX_BACKSLASH_Z: 724 case URX_STRING_LEN: 725 case URX_CTR_INIT: 726 case URX_CTR_INIT_NG: 727 case URX_CTR_LOOP: 728 case URX_CTR_LOOP_NG: 729 case URX_RELOC_OPRND: 730 case URX_STO_SP: 731 case URX_LD_SP: 732 case URX_BACKREF: 733 case URX_STO_INP_LOC: 734 case URX_JMPX: 735 case URX_LA_START: 736 case URX_LA_END: 737 case URX_BACKREF_I: 738 case URX_LB_START: 739 case URX_LB_CONT: 740 case URX_LB_END: 741 case URX_LBN_CONT: 742 case URX_LBN_END: 743 case URX_LOOP_C: 744 case URX_LOOP_DOT_I: 745 case URX_BACKSLASH_H: 746 case URX_BACKSLASH_R: 747 case URX_BACKSLASH_V: 748 // types with an integer operand field. 749 printf("%d", val); 750 break; 751 752 case URX_ONECHAR: 753 case URX_ONECHAR_I: 754 printf("%c", val<256?val:'?'); 755 break; 756 757 case URX_STRING: 758 case URX_STRING_I: 759 { 760 int32_t lengthOp = fCompiledPat->elementAti(index+1); 761 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); 762 int32_t length = URX_VAL(lengthOp); 763 int32_t i; 764 for (i=val; i<val+length; i++) { 765 UChar c = fLiteralText[i]; 766 if (c < 32 || c >= 256) {c = '.';} 767 printf("%c", c); 768 } 769 } 770 break; 771 772 case URX_SETREF: 773 case URX_LOOP_SR_I: 774 { 775 UnicodeString s; 776 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); 777 set->toPattern(s, TRUE); 778 for (int32_t i=0; i<s.length(); i++) { 779 printf("%c", s.charAt(i)); 780 } 781 } 782 break; 783 784 case URX_STATIC_SETREF: 785 case URX_STAT_SETREF_N: 786 { 787 UnicodeString s; 788 if (val & URX_NEG_SET) { 789 printf("NOT "); 790 val &= ~URX_NEG_SET; 791 } 792 UnicodeSet *set = fStaticSets[val]; 793 set->toPattern(s, TRUE); 794 for (int32_t i=0; i<s.length(); i++) { 795 printf("%c", s.charAt(i)); 796 } 797 } 798 break; 799 800 801 default: 802 printf("??????"); 803 break; 804 } 805 printf("\n"); 806#endif 807} 808 809 810void RegexPattern::dumpPattern() const { 811#if defined(REGEX_DEBUG) 812 // TODO: This function assumes an ASCII based charset. 813 int index; 814 int i; 815 816 printf("Original Pattern: "); 817 UChar32 c = utext_next32From(fPattern, 0); 818 while (c != U_SENTINEL) { 819 if (c<32 || c>256) { 820 c = '.'; 821 } 822 printf("%c", c); 823 824 c = UTEXT_NEXT32(fPattern); 825 } 826 printf("\n"); 827 printf(" Min Match Length: %d\n", fMinMatchLen); 828 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType)); 829 if (fStartType == START_STRING) { 830 printf(" Initial match string: \""); 831 for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) { 832 printf("%c", fLiteralText[i]); // TODO: non-printables, surrogates. 833 } 834 printf("\"\n"); 835 836 } else if (fStartType == START_SET) { 837 int32_t numSetChars = fInitialChars->size(); 838 if (numSetChars > 20) { 839 numSetChars = 20; 840 } 841 printf(" Match First Chars : "); 842 for (i=0; i<numSetChars; i++) { 843 UChar32 c = fInitialChars->charAt(i); 844 if (0x20<c && c <0x7e) { 845 printf("%c ", c); 846 } else { 847 printf("%#x ", c); 848 } 849 } 850 if (numSetChars < fInitialChars->size()) { 851 printf(" ..."); 852 } 853 printf("\n"); 854 855 } else if (fStartType == START_CHAR) { 856 printf(" First char of Match : "); 857 if (0x20 < fInitialChar && fInitialChar<0x7e) { 858 printf("%c\n", fInitialChar); 859 } else { 860 printf("%#x\n", fInitialChar); 861 } 862 } 863 864 printf("Named Capture Groups:\n"); 865 if (uhash_count(fNamedCaptureMap) == 0) { 866 printf(" None\n"); 867 } else { 868 int32_t pos = UHASH_FIRST; 869 const UHashElement *el = NULL; 870 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) { 871 const UnicodeString *name = (const UnicodeString *)el->key.pointer; 872 char s[100]; 873 name->extract(0, 99, s, sizeof(s), US_INV); // capture group names are invariant. 874 int32_t number = el->value.integer; 875 printf(" %d\t%s\n", number, s); 876 } 877 } 878 879 printf("\nIndex Binary Type Operand\n" \ 880 "-------------------------------------------\n"); 881 for (index = 0; index<fCompiledPat->size(); index++) { 882 dumpOp(index); 883 } 884 printf("\n\n"); 885#endif 886} 887 888 889 890UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) 891 892U_NAMESPACE_END 893#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 894