1//
2//  file:  repattrn.cpp
3//
4/*
5***************************************************************************
6*   Copyright (C) 2002-2015 International Business Machines Corporation   *
7*   and others. All rights reserved.                                      *
8***************************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15#include "unicode/regex.h"
16#include "unicode/uclean.h"
17#include "uassert.h"
18#include "uhash.h"
19#include "uvector.h"
20#include "uvectr32.h"
21#include "uvectr64.h"
22#include "regexcmp.h"
23#include "regeximp.h"
24#include "regexst.h"
25
26U_NAMESPACE_BEGIN
27
28//--------------------------------------------------------------------------
29//
30//    RegexPattern    Default Constructor
31//
32//--------------------------------------------------------------------------
33RegexPattern::RegexPattern() {
34    // Init all of this instances data.
35    init();
36}
37
38
39//--------------------------------------------------------------------------
40//
41//   Copy Constructor        Note:  This is a rather inefficient implementation,
42//                                  but it probably doesn't matter.
43//
44//--------------------------------------------------------------------------
45RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
46    init();
47    *this = other;
48}
49
50
51
52//--------------------------------------------------------------------------
53//
54//    Assignment Operator
55//
56//--------------------------------------------------------------------------
57RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
58    if (this == &other) {
59        // Source and destination are the same.  Don't do anything.
60        return *this;
61    }
62
63    // Clean out any previous contents of object being assigned to.
64    zap();
65
66    // Give target object a default initialization
67    init();
68
69    // Copy simple fields
70    fDeferredStatus   = other.fDeferredStatus;
71
72    if (U_FAILURE(fDeferredStatus)) {
73        return *this;
74    }
75
76    if (other.fPatternString == NULL) {
77        fPatternString = NULL;
78        fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
79    } else {
80        fPatternString = new UnicodeString(*(other.fPatternString));
81        if (fPatternString == NULL) {
82            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
83        } else {
84            fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
85        }
86    }
87    if (U_FAILURE(fDeferredStatus)) {
88        return *this;
89    }
90
91    fFlags            = other.fFlags;
92    fLiteralText      = other.fLiteralText;
93    fMinMatchLen      = other.fMinMatchLen;
94    fFrameSize        = other.fFrameSize;
95    fDataSize         = other.fDataSize;
96    fStaticSets       = other.fStaticSets;
97    fStaticSets8      = other.fStaticSets8;
98
99    fStartType        = other.fStartType;
100    fInitialStringIdx = other.fInitialStringIdx;
101    fInitialStringLen = other.fInitialStringLen;
102    *fInitialChars    = *other.fInitialChars;
103    fInitialChar      = other.fInitialChar;
104    *fInitialChars8   = *other.fInitialChars8;
105    fNeedsAltInput    = other.fNeedsAltInput;
106
107    //  Copy the pattern.  It's just values, nothing deep to copy.
108    fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
109    fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
110
111    //  Copy the Unicode Sets.
112    //    Could be made more efficient if the sets were reference counted and shared,
113    //    but I doubt that pattern copying will be particularly common.
114    //    Note:  init() already added an empty element zero to fSets
115    int32_t i;
116    int32_t  numSets = other.fSets->size();
117    fSets8 = new Regex8BitSet[numSets];
118    if (fSets8 == NULL) {
119    	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
120    	return *this;
121    }
122    for (i=1; i<numSets; i++) {
123        if (U_FAILURE(fDeferredStatus)) {
124            return *this;
125        }
126        UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
127        UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
128        if (newSet == NULL) {
129            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
130            break;
131        }
132        fSets->addElement(newSet, fDeferredStatus);
133        fSets8[i] = other.fSets8[i];
134    }
135
136    // Copy the named capture group hash map.
137    int32_t hashPos = UHASH_FIRST;
138    while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
139        if (U_FAILURE(fDeferredStatus)) {
140            break;
141        }
142        const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
143        UnicodeString *key = new UnicodeString(*name);
144        int32_t val = hashEl->value.integer;
145        if (key == NULL) {
146            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
147        } else {
148            uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
149        }
150    }
151    return *this;
152}
153
154
155//--------------------------------------------------------------------------
156//
157//    init        Shared initialization for use by constructors.
158//                Bring an uninitialized RegexPattern up to a default state.
159//
160//--------------------------------------------------------------------------
161void RegexPattern::init() {
162    fFlags            = 0;
163    fCompiledPat      = 0;
164    fLiteralText.remove();
165    fSets             = NULL;
166    fSets8            = NULL;
167    fDeferredStatus   = U_ZERO_ERROR;
168    fMinMatchLen      = 0;
169    fFrameSize        = 0;
170    fDataSize         = 0;
171    fGroupMap         = NULL;
172    fStaticSets       = NULL;
173    fStaticSets8      = NULL;
174    fStartType        = START_NO_INFO;
175    fInitialStringIdx = 0;
176    fInitialStringLen = 0;
177    fInitialChars     = NULL;
178    fInitialChar      = 0;
179    fInitialChars8    = NULL;
180    fNeedsAltInput    = FALSE;
181    fNamedCaptureMap  = NULL;
182
183    fPattern          = NULL; // will be set later
184    fPatternString    = NULL; // may be set later
185    fCompiledPat      = new UVector64(fDeferredStatus);
186    fGroupMap         = new UVector32(fDeferredStatus);
187    fSets             = new UVector(fDeferredStatus);
188    fInitialChars     = new UnicodeSet;
189    fInitialChars8    = new Regex8BitSet;
190    fNamedCaptureMap  = uhash_open(uhash_hashUnicodeString,     // Key hash function
191                                   uhash_compareUnicodeString,  // Key comparator function
192                                   uhash_compareLong,           // Value comparator function
193                                   &fDeferredStatus);
194    if (U_FAILURE(fDeferredStatus)) {
195        return;
196    }
197    if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
198            fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
199        fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
200        return;
201    }
202
203    // Slot zero of the vector of sets is reserved.  Fill it here.
204    fSets->addElement((int32_t)0, fDeferredStatus);
205
206    // fNamedCaptureMap owns its key strings, type (UnicodeString *)
207    uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
208}
209
210
211//--------------------------------------------------------------------------
212//
213//   zap            Delete everything owned by this RegexPattern.
214//
215//--------------------------------------------------------------------------
216void RegexPattern::zap() {
217    delete fCompiledPat;
218    fCompiledPat = NULL;
219    int i;
220    for (i=1; i<fSets->size(); i++) {
221        UnicodeSet *s;
222        s = (UnicodeSet *)fSets->elementAt(i);
223        if (s != NULL) {
224            delete s;
225        }
226    }
227    delete fSets;
228    fSets = NULL;
229    delete[] fSets8;
230    fSets8 = NULL;
231    delete fGroupMap;
232    fGroupMap = NULL;
233    delete fInitialChars;
234    fInitialChars = NULL;
235    delete fInitialChars8;
236    fInitialChars8 = NULL;
237    if (fPattern != NULL) {
238        utext_close(fPattern);
239        fPattern = NULL;
240    }
241    if (fPatternString != NULL) {
242        delete fPatternString;
243        fPatternString = NULL;
244    }
245    uhash_close(fNamedCaptureMap);
246    fNamedCaptureMap = NULL;
247}
248
249
250//--------------------------------------------------------------------------
251//
252//   Destructor
253//
254//--------------------------------------------------------------------------
255RegexPattern::~RegexPattern() {
256    zap();
257}
258
259
260//--------------------------------------------------------------------------
261//
262//   Clone
263//
264//--------------------------------------------------------------------------
265RegexPattern  *RegexPattern::clone() const {
266    RegexPattern  *copy = new RegexPattern(*this);
267    return copy;
268}
269
270
271//--------------------------------------------------------------------------
272//
273//   operator ==   (comparison)    Consider to patterns to be == if the
274//                                 pattern strings and the flags are the same.
275//                                 Note that pattern strings with the same
276//                                 characters can still be considered different.
277//
278//--------------------------------------------------------------------------
279UBool   RegexPattern::operator ==(const RegexPattern &other) const {
280    if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
281        if (this->fPatternString != NULL && other.fPatternString != NULL) {
282            return *(this->fPatternString) == *(other.fPatternString);
283        } else if (this->fPattern == NULL) {
284            if (other.fPattern == NULL) {
285                return TRUE;
286            }
287        } else if (other.fPattern != NULL) {
288            UTEXT_SETNATIVEINDEX(this->fPattern, 0);
289            UTEXT_SETNATIVEINDEX(other.fPattern, 0);
290            return utext_equals(this->fPattern, other.fPattern);
291        }
292    }
293    return FALSE;
294}
295
296//---------------------------------------------------------------------
297//
298//   compile
299//
300//---------------------------------------------------------------------
301RegexPattern * U_EXPORT2
302RegexPattern::compile(const UnicodeString &regex,
303                      uint32_t             flags,
304                      UParseError          &pe,
305                      UErrorCode           &status)
306{
307    if (U_FAILURE(status)) {
308        return NULL;
309    }
310
311    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
312    UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
313    UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
314
315    if ((flags & ~allFlags) != 0) {
316        status = U_REGEX_INVALID_FLAG;
317        return NULL;
318    }
319
320    if ((flags & UREGEX_CANON_EQ) != 0) {
321        status = U_REGEX_UNIMPLEMENTED;
322        return NULL;
323    }
324
325    RegexPattern *This = new RegexPattern;
326    if (This == NULL) {
327        status = U_MEMORY_ALLOCATION_ERROR;
328        return NULL;
329    }
330    if (U_FAILURE(This->fDeferredStatus)) {
331        status = This->fDeferredStatus;
332        delete This;
333        return NULL;
334    }
335    This->fFlags = flags;
336
337    RegexCompile     compiler(This, status);
338    compiler.compile(regex, pe, status);
339
340    if (U_FAILURE(status)) {
341        delete This;
342        This = NULL;
343    }
344
345    return This;
346}
347
348
349//
350//   compile, UText mode
351//
352RegexPattern * U_EXPORT2
353RegexPattern::compile(UText                *regex,
354                      uint32_t             flags,
355                      UParseError          &pe,
356                      UErrorCode           &status)
357{
358    if (U_FAILURE(status)) {
359        return NULL;
360    }
361
362    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
363                              UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
364                              UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
365
366    if ((flags & ~allFlags) != 0) {
367        status = U_REGEX_INVALID_FLAG;
368        return NULL;
369    }
370
371    if ((flags & UREGEX_CANON_EQ) != 0) {
372        status = U_REGEX_UNIMPLEMENTED;
373        return NULL;
374    }
375
376    RegexPattern *This = new RegexPattern;
377    if (This == NULL) {
378        status = U_MEMORY_ALLOCATION_ERROR;
379        return NULL;
380    }
381    if (U_FAILURE(This->fDeferredStatus)) {
382        status = This->fDeferredStatus;
383        delete This;
384        return NULL;
385    }
386    This->fFlags = flags;
387
388    RegexCompile     compiler(This, status);
389    compiler.compile(regex, pe, status);
390
391    if (U_FAILURE(status)) {
392        delete This;
393        This = NULL;
394    }
395
396    return This;
397}
398
399//
400//   compile with default flags.
401//
402RegexPattern * U_EXPORT2
403RegexPattern::compile(const UnicodeString &regex,
404                      UParseError         &pe,
405                      UErrorCode          &err)
406{
407    return compile(regex, 0, pe, err);
408}
409
410
411//
412//   compile with default flags, UText mode
413//
414RegexPattern * U_EXPORT2
415RegexPattern::compile(UText               *regex,
416                      UParseError         &pe,
417                      UErrorCode          &err)
418{
419    return compile(regex, 0, pe, err);
420}
421
422
423//
424//   compile with no UParseErr parameter.
425//
426RegexPattern * U_EXPORT2
427RegexPattern::compile(const UnicodeString &regex,
428                      uint32_t             flags,
429                      UErrorCode          &err)
430{
431    UParseError pe;
432    return compile(regex, flags, pe, err);
433}
434
435
436//
437//   compile with no UParseErr parameter, UText mode
438//
439RegexPattern * U_EXPORT2
440RegexPattern::compile(UText                *regex,
441                      uint32_t             flags,
442                      UErrorCode           &err)
443{
444    UParseError pe;
445    return compile(regex, flags, pe, err);
446}
447
448
449//---------------------------------------------------------------------
450//
451//   flags
452//
453//---------------------------------------------------------------------
454uint32_t RegexPattern::flags() const {
455    return fFlags;
456}
457
458
459//---------------------------------------------------------------------
460//
461//   matcher(UnicodeString, err)
462//
463//---------------------------------------------------------------------
464RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
465                                    UErrorCode          &status)  const {
466    RegexMatcher    *retMatcher = matcher(status);
467    if (retMatcher != NULL) {
468        retMatcher->fDeferredStatus = status;
469        retMatcher->reset(input);
470    }
471    return retMatcher;
472}
473
474
475//---------------------------------------------------------------------
476//
477//   matcher(status)
478//
479//---------------------------------------------------------------------
480RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
481    RegexMatcher    *retMatcher = NULL;
482
483    if (U_FAILURE(status)) {
484        return NULL;
485    }
486    if (U_FAILURE(fDeferredStatus)) {
487        status = fDeferredStatus;
488        return NULL;
489    }
490
491    retMatcher = new RegexMatcher(this);
492    if (retMatcher == NULL) {
493        status = U_MEMORY_ALLOCATION_ERROR;
494        return NULL;
495    }
496    return retMatcher;
497}
498
499
500
501//---------------------------------------------------------------------
502//
503//   matches        Convenience function to test for a match, starting
504//                  with a pattern string and a data string.
505//
506//---------------------------------------------------------------------
507UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
508              const UnicodeString   &input,
509                    UParseError     &pe,
510                    UErrorCode      &status) {
511
512    if (U_FAILURE(status)) {return FALSE;}
513
514    UBool         retVal;
515    RegexPattern *pat     = NULL;
516    RegexMatcher *matcher = NULL;
517
518    pat     = RegexPattern::compile(regex, 0, pe, status);
519    matcher = pat->matcher(input, status);
520    retVal  = matcher->matches(status);
521
522    delete matcher;
523    delete pat;
524    return retVal;
525}
526
527
528//
529//   matches, UText mode
530//
531UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
532                    UText           *input,
533                    UParseError     &pe,
534                    UErrorCode      &status) {
535
536    if (U_FAILURE(status)) {return FALSE;}
537
538    UBool         retVal  = FALSE;
539    RegexPattern *pat     = NULL;
540    RegexMatcher *matcher = NULL;
541
542    pat     = RegexPattern::compile(regex, 0, pe, status);
543    matcher = pat->matcher(status);
544    if (U_SUCCESS(status)) {
545        matcher->reset(input);
546        retVal  = matcher->matches(status);
547    }
548
549    delete matcher;
550    delete pat;
551    return retVal;
552}
553
554
555
556
557
558//---------------------------------------------------------------------
559//
560//   pattern
561//
562//---------------------------------------------------------------------
563UnicodeString RegexPattern::pattern() const {
564    if (fPatternString != NULL) {
565        return *fPatternString;
566    } else if (fPattern == NULL) {
567        return UnicodeString();
568    } else {
569        UErrorCode status = U_ZERO_ERROR;
570        int64_t nativeLen = utext_nativeLength(fPattern);
571        int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
572        UnicodeString result;
573
574        status = U_ZERO_ERROR;
575        UChar *resultChars = result.getBuffer(len16);
576        utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
577        result.releaseBuffer(len16);
578
579        return result;
580    }
581}
582
583
584
585
586//---------------------------------------------------------------------
587//
588//   patternText
589//
590//---------------------------------------------------------------------
591UText *RegexPattern::patternText(UErrorCode      &status) const {
592    if (U_FAILURE(status)) {return NULL;}
593    status = U_ZERO_ERROR;
594
595    if (fPattern != NULL) {
596        return fPattern;
597    } else {
598        RegexStaticSets::initGlobals(&status);
599        return RegexStaticSets::gStaticSets->fEmptyText;
600    }
601}
602
603
604//--------------------------------------------------------------------------------
605//
606//  groupNumberFromName()
607//
608//--------------------------------------------------------------------------------
609int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
610    if (U_FAILURE(status)) {
611        return 0;
612    }
613
614    // No need to explicitly check for syntactically valid names.
615    // Invalid ones will never be in the map, and the lookup will fail.
616
617    int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
618    if (number == 0) {
619        status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
620    }
621    return number;
622}
623
624int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
625    if (U_FAILURE(status)) {
626        return 0;
627    }
628    UnicodeString name(groupName, nameLength, US_INV);
629    return groupNumberFromName(name, status);
630}
631
632
633//---------------------------------------------------------------------
634//
635//   split
636//
637//---------------------------------------------------------------------
638int32_t  RegexPattern::split(const UnicodeString &input,
639        UnicodeString    dest[],
640        int32_t          destCapacity,
641        UErrorCode      &status) const
642{
643    if (U_FAILURE(status)) {
644        return 0;
645    };
646
647    RegexMatcher  m(this);
648    int32_t r = 0;
649    // Check m's status to make sure all is ok.
650    if (U_SUCCESS(m.fDeferredStatus)) {
651    	r = m.split(input, dest, destCapacity, status);
652    }
653    return r;
654}
655
656//
657//   split, UText mode
658//
659int32_t  RegexPattern::split(UText *input,
660        UText           *dest[],
661        int32_t          destCapacity,
662        UErrorCode      &status) const
663{
664    if (U_FAILURE(status)) {
665        return 0;
666    };
667
668    RegexMatcher  m(this);
669    int32_t r = 0;
670    // Check m's status to make sure all is ok.
671    if (U_SUCCESS(m.fDeferredStatus)) {
672    	r = m.split(input, dest, destCapacity, status);
673    }
674    return r;
675}
676
677
678
679//---------------------------------------------------------------------
680//
681//   dump    Output the compiled form of the pattern.
682//           Debugging function only.
683//
684//---------------------------------------------------------------------
685void   RegexPattern::dumpOp(int32_t index) const {
686    (void)index;  // Suppress warnings in non-debug build.
687#if defined(REGEX_DEBUG)
688    static const char * const opNames[] = {URX_OPCODE_NAMES};
689    int32_t op          = fCompiledPat->elementAti(index);
690    int32_t val         = URX_VAL(op);
691    int32_t type        = URX_TYPE(op);
692    int32_t pinnedType  = type;
693    if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
694        pinnedType = 0;
695    }
696
697    printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
698    switch (type) {
699    case URX_NOP:
700    case URX_DOTANY:
701    case URX_DOTANY_ALL:
702    case URX_FAIL:
703    case URX_CARET:
704    case URX_DOLLAR:
705    case URX_BACKSLASH_G:
706    case URX_BACKSLASH_X:
707    case URX_END:
708    case URX_DOLLAR_M:
709    case URX_CARET_M:
710        // Types with no operand field of interest.
711        break;
712
713    case URX_RESERVED_OP:
714    case URX_START_CAPTURE:
715    case URX_END_CAPTURE:
716    case URX_STATE_SAVE:
717    case URX_JMP:
718    case URX_JMP_SAV:
719    case URX_JMP_SAV_X:
720    case URX_BACKSLASH_B:
721    case URX_BACKSLASH_BU:
722    case URX_BACKSLASH_D:
723    case URX_BACKSLASH_Z:
724    case URX_STRING_LEN:
725    case URX_CTR_INIT:
726    case URX_CTR_INIT_NG:
727    case URX_CTR_LOOP:
728    case URX_CTR_LOOP_NG:
729    case URX_RELOC_OPRND:
730    case URX_STO_SP:
731    case URX_LD_SP:
732    case URX_BACKREF:
733    case URX_STO_INP_LOC:
734    case URX_JMPX:
735    case URX_LA_START:
736    case URX_LA_END:
737    case URX_BACKREF_I:
738    case URX_LB_START:
739    case URX_LB_CONT:
740    case URX_LB_END:
741    case URX_LBN_CONT:
742    case URX_LBN_END:
743    case URX_LOOP_C:
744    case URX_LOOP_DOT_I:
745    case URX_BACKSLASH_H:
746    case URX_BACKSLASH_R:
747    case URX_BACKSLASH_V:
748        // types with an integer operand field.
749        printf("%d", val);
750        break;
751
752    case URX_ONECHAR:
753    case URX_ONECHAR_I:
754        printf("%c", val<256?val:'?');
755        break;
756
757    case URX_STRING:
758    case URX_STRING_I:
759        {
760            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
761            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
762            int32_t length = URX_VAL(lengthOp);
763            int32_t i;
764            for (i=val; i<val+length; i++) {
765                UChar c = fLiteralText[i];
766                if (c < 32 || c >= 256) {c = '.';}
767                printf("%c", c);
768            }
769        }
770        break;
771
772    case URX_SETREF:
773    case URX_LOOP_SR_I:
774        {
775            UnicodeString s;
776            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
777            set->toPattern(s, TRUE);
778            for (int32_t i=0; i<s.length(); i++) {
779                printf("%c", s.charAt(i));
780            }
781        }
782        break;
783
784    case URX_STATIC_SETREF:
785    case URX_STAT_SETREF_N:
786        {
787            UnicodeString s;
788            if (val & URX_NEG_SET) {
789                printf("NOT ");
790                val &= ~URX_NEG_SET;
791            }
792            UnicodeSet *set = fStaticSets[val];
793            set->toPattern(s, TRUE);
794            for (int32_t i=0; i<s.length(); i++) {
795                printf("%c", s.charAt(i));
796            }
797        }
798        break;
799
800
801    default:
802        printf("??????");
803        break;
804    }
805    printf("\n");
806#endif
807}
808
809
810void RegexPattern::dumpPattern() const {
811#if defined(REGEX_DEBUG)
812    // TODO: This function assumes an ASCII based charset.
813    int      index;
814    int      i;
815
816    printf("Original Pattern:  ");
817    UChar32 c = utext_next32From(fPattern, 0);
818    while (c != U_SENTINEL) {
819        if (c<32 || c>256) {
820            c = '.';
821        }
822        printf("%c", c);
823
824        c = UTEXT_NEXT32(fPattern);
825    }
826    printf("\n");
827    printf("   Min Match Length:  %d\n", fMinMatchLen);
828    printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
829    if (fStartType == START_STRING) {
830        printf("    Initial match string: \"");
831        for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
832            printf("%c", fLiteralText[i]);   // TODO:  non-printables, surrogates.
833        }
834        printf("\"\n");
835
836    } else if (fStartType == START_SET) {
837        int32_t numSetChars = fInitialChars->size();
838        if (numSetChars > 20) {
839            numSetChars = 20;
840        }
841        printf("     Match First Chars : ");
842        for (i=0; i<numSetChars; i++) {
843            UChar32 c = fInitialChars->charAt(i);
844            if (0x20<c && c <0x7e) {
845                printf("%c ", c);
846            } else {
847                printf("%#x ", c);
848            }
849        }
850        if (numSetChars < fInitialChars->size()) {
851            printf(" ...");
852        }
853        printf("\n");
854
855    } else if (fStartType == START_CHAR) {
856        printf("    First char of Match : ");
857        if (0x20 < fInitialChar && fInitialChar<0x7e) {
858                printf("%c\n", fInitialChar);
859            } else {
860                printf("%#x\n", fInitialChar);
861            }
862    }
863
864    printf("Named Capture Groups:\n");
865    if (uhash_count(fNamedCaptureMap) == 0) {
866        printf("   None\n");
867    } else {
868        int32_t pos = UHASH_FIRST;
869        const UHashElement *el = NULL;
870        while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
871            const UnicodeString *name = (const UnicodeString *)el->key.pointer;
872            char s[100];
873            name->extract(0, 99, s, sizeof(s), US_INV);  // capture group names are invariant.
874            int32_t number = el->value.integer;
875            printf("   %d\t%s\n", number, s);
876        }
877    }
878
879    printf("\nIndex   Binary     Type             Operand\n" \
880           "-------------------------------------------\n");
881    for (index = 0; index<fCompiledPat->size(); index++) {
882        dumpOp(index);
883    }
884    printf("\n\n");
885#endif
886}
887
888
889
890UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
891
892U_NAMESPACE_END
893#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
894