1//
2//  file:  repattrn.cpp
3//
4/*
5***************************************************************************
6*   Copyright (C) 2002-2010 International Business Machines Corporation   *
7*   and others. All rights reserved.                                      *
8***************************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15#include "unicode/regex.h"
16#include "unicode/uclean.h"
17#include "uassert.h"
18#include "uvector.h"
19#include "uvectr32.h"
20#include "uvectr64.h"
21#include "regexcmp.h"
22#include "regeximp.h"
23#include "regexst.h"
24
25U_NAMESPACE_BEGIN
26
27//--------------------------------------------------------------------------
28//
29//    RegexPattern    Default Constructor
30//
31//--------------------------------------------------------------------------
32RegexPattern::RegexPattern() {
33    UErrorCode status = U_ZERO_ERROR;
34    u_init(&status);
35
36    // Init all of this instances data.
37    init();
38}
39
40
41//--------------------------------------------------------------------------
42//
43//   Copy Constructor        Note:  This is a rather inefficient implementation,
44//                                  but it probably doesn't matter.
45//
46//--------------------------------------------------------------------------
47RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
48    init();
49    *this = other;
50}
51
52
53
54//--------------------------------------------------------------------------
55//
56//    Assignment Operator
57//
58//--------------------------------------------------------------------------
59RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
60    if (this == &other) {
61        // Source and destination are the same.  Don't do anything.
62        return *this;
63    }
64
65    // Clean out any previous contents of object being assigned to.
66    zap();
67
68    // Give target object a default initialization
69    init();
70
71    // Copy simple fields
72    if ( other.fPatternString == NULL ) {
73        fPatternString = NULL;
74        fPattern      = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
75    } else {
76        fPatternString = new UnicodeString(*(other.fPatternString));
77        UErrorCode status = U_ZERO_ERROR;
78        fPattern      = utext_openConstUnicodeString(NULL, fPatternString, &status);
79        if (U_FAILURE(status)) {
80            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
81            return *this;
82        }
83    }
84    fFlags            = other.fFlags;
85    fLiteralText      = other.fLiteralText;
86    fDeferredStatus   = other.fDeferredStatus;
87    fMinMatchLen      = other.fMinMatchLen;
88    fFrameSize        = other.fFrameSize;
89    fDataSize         = other.fDataSize;
90    fMaxCaptureDigits = other.fMaxCaptureDigits;
91    fStaticSets       = other.fStaticSets;
92    fStaticSets8      = other.fStaticSets8;
93
94    fStartType        = other.fStartType;
95    fInitialStringIdx = other.fInitialStringIdx;
96    fInitialStringLen = other.fInitialStringLen;
97    *fInitialChars    = *other.fInitialChars;
98    fInitialChar      = other.fInitialChar;
99    *fInitialChars8   = *other.fInitialChars8;
100    fNeedsAltInput    = other.fNeedsAltInput;
101
102    //  Copy the pattern.  It's just values, nothing deep to copy.
103    fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
104    fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
105
106    //  Copy the Unicode Sets.
107    //    Could be made more efficient if the sets were reference counted and shared,
108    //    but I doubt that pattern copying will be particularly common.
109    //    Note:  init() already added an empty element zero to fSets
110    int32_t i;
111    int32_t  numSets = other.fSets->size();
112    fSets8 = new Regex8BitSet[numSets];
113    if (fSets8 == NULL) {
114    	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
115    	return *this;
116    }
117    for (i=1; i<numSets; i++) {
118        if (U_FAILURE(fDeferredStatus)) {
119            return *this;
120        }
121        UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
122        UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
123        if (newSet == NULL) {
124            fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
125            break;
126        }
127        fSets->addElement(newSet, fDeferredStatus);
128        fSets8[i] = other.fSets8[i];
129    }
130
131    return *this;
132}
133
134
135//--------------------------------------------------------------------------
136//
137//    init        Shared initialization for use by constructors.
138//                Bring an uninitialized RegexPattern up to a default state.
139//
140//--------------------------------------------------------------------------
141void RegexPattern::init() {
142    fFlags            = 0;
143    fCompiledPat      = 0;
144    fLiteralText.remove();
145    fSets             = NULL;
146    fSets8            = NULL;
147    fDeferredStatus   = U_ZERO_ERROR;
148    fMinMatchLen      = 0;
149    fFrameSize        = 0;
150    fDataSize         = 0;
151    fGroupMap         = NULL;
152    fMaxCaptureDigits = 1;
153    fStaticSets       = NULL;
154    fStaticSets8      = NULL;
155    fStartType        = START_NO_INFO;
156    fInitialStringIdx = 0;
157    fInitialStringLen = 0;
158    fInitialChars     = NULL;
159    fInitialChar      = 0;
160    fInitialChars8    = NULL;
161    fNeedsAltInput    = FALSE;
162
163    fPattern          = NULL; // will be set later
164    fPatternString    = NULL; // may be set later
165    fCompiledPat      = new UVector64(fDeferredStatus);
166    fGroupMap         = new UVector32(fDeferredStatus);
167    fSets             = new UVector(fDeferredStatus);
168    fInitialChars     = new UnicodeSet;
169    fInitialChars8    = new Regex8BitSet;
170    if (U_FAILURE(fDeferredStatus)) {
171        return;
172    }
173    if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
174        fInitialChars == NULL || fInitialChars8 == NULL) {
175        fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
176        return;
177    }
178
179    // Slot zero of the vector of sets is reserved.  Fill it here.
180    fSets->addElement((int32_t)0, fDeferredStatus);
181}
182
183
184//--------------------------------------------------------------------------
185//
186//   zap            Delete everything owned by this RegexPattern.
187//
188//--------------------------------------------------------------------------
189void RegexPattern::zap() {
190    delete fCompiledPat;
191    fCompiledPat = NULL;
192    int i;
193    for (i=1; i<fSets->size(); i++) {
194        UnicodeSet *s;
195        s = (UnicodeSet *)fSets->elementAt(i);
196        if (s != NULL) {
197            delete s;
198        }
199    }
200    delete fSets;
201    fSets = NULL;
202    delete[] fSets8;
203    fSets8 = NULL;
204    delete fGroupMap;
205    fGroupMap = NULL;
206    delete fInitialChars;
207    fInitialChars = NULL;
208    delete fInitialChars8;
209    fInitialChars8 = NULL;
210    if (fPattern != NULL) {
211        utext_close(fPattern);
212        fPattern = NULL;
213    }
214    if (fPatternString != NULL) {
215        delete fPatternString;
216        fPatternString = NULL;
217    }
218}
219
220
221//--------------------------------------------------------------------------
222//
223//   Destructor
224//
225//--------------------------------------------------------------------------
226RegexPattern::~RegexPattern() {
227    zap();
228}
229
230
231//--------------------------------------------------------------------------
232//
233//   Clone
234//
235//--------------------------------------------------------------------------
236RegexPattern  *RegexPattern::clone() const {
237    RegexPattern  *copy = new RegexPattern(*this);
238    return copy;
239}
240
241
242//--------------------------------------------------------------------------
243//
244//   operator ==   (comparison)    Consider to patterns to be == if the
245//                                 pattern strings and the flags are the same.
246//                                 Note that pattern strings with the same
247//                                 characters can still be considered different.
248//
249//--------------------------------------------------------------------------
250UBool   RegexPattern::operator ==(const RegexPattern &other) const {
251    if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
252        if (this->fPatternString != NULL && other.fPatternString != NULL) {
253            return *(this->fPatternString) == *(other.fPatternString);
254        } else if (this->fPattern == NULL) {
255            if (other.fPattern == NULL) {
256                return TRUE;
257            }
258        } else if (other.fPattern != NULL) {
259            UTEXT_SETNATIVEINDEX(this->fPattern, 0);
260            UTEXT_SETNATIVEINDEX(other.fPattern, 0);
261            return utext_equals(this->fPattern, other.fPattern);
262        }
263    }
264    return FALSE;
265}
266
267//---------------------------------------------------------------------
268//
269//   compile
270//
271//---------------------------------------------------------------------
272RegexPattern * U_EXPORT2
273RegexPattern::compile(const UnicodeString &regex,
274                      uint32_t             flags,
275                      UParseError          &pe,
276                      UErrorCode           &status)
277{
278    if (U_FAILURE(status)) {
279        return NULL;
280    }
281
282    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
283    UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
284    UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
285
286    if ((flags & ~allFlags) != 0) {
287        status = U_REGEX_INVALID_FLAG;
288        return NULL;
289    }
290
291    if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
292        status = U_REGEX_UNIMPLEMENTED;
293        return NULL;
294    }
295
296    RegexPattern *This = new RegexPattern;
297    if (This == NULL) {
298        status = U_MEMORY_ALLOCATION_ERROR;
299        return NULL;
300    }
301    if (U_FAILURE(This->fDeferredStatus)) {
302        status = This->fDeferredStatus;
303        delete This;
304        return NULL;
305    }
306    This->fFlags = flags;
307
308    RegexCompile     compiler(This, status);
309    compiler.compile(regex, pe, status);
310
311    if (U_FAILURE(status)) {
312        delete This;
313        This = NULL;
314    }
315
316    return This;
317}
318
319
320//
321//   compile, UText mode
322//
323RegexPattern * U_EXPORT2
324RegexPattern::compile(UText                *regex,
325                      uint32_t             flags,
326                      UParseError          &pe,
327                      UErrorCode           &status)
328{
329    if (U_FAILURE(status)) {
330        return NULL;
331    }
332
333    const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
334                              UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
335                              UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
336
337    if ((flags & ~allFlags) != 0) {
338        status = U_REGEX_INVALID_FLAG;
339        return NULL;
340    }
341
342    if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
343        status = U_REGEX_UNIMPLEMENTED;
344        return NULL;
345    }
346
347    RegexPattern *This = new RegexPattern;
348    if (This == NULL) {
349        status = U_MEMORY_ALLOCATION_ERROR;
350        return NULL;
351    }
352    if (U_FAILURE(This->fDeferredStatus)) {
353        status = This->fDeferredStatus;
354        delete This;
355        return NULL;
356    }
357    This->fFlags = flags;
358
359    RegexCompile     compiler(This, status);
360    compiler.compile(regex, pe, status);
361
362    if (U_FAILURE(status)) {
363        delete This;
364        This = NULL;
365    }
366
367    return This;
368}
369
370//
371//   compile with default flags.
372//
373RegexPattern * U_EXPORT2
374RegexPattern::compile(const UnicodeString &regex,
375                      UParseError         &pe,
376                      UErrorCode          &err)
377{
378    return compile(regex, 0, pe, err);
379}
380
381
382//
383//   compile with default flags, UText mode
384//
385RegexPattern * U_EXPORT2
386RegexPattern::compile(UText               *regex,
387                      UParseError         &pe,
388                      UErrorCode          &err)
389{
390    return compile(regex, 0, pe, err);
391}
392
393
394//
395//   compile with no UParseErr parameter.
396//
397RegexPattern * U_EXPORT2
398RegexPattern::compile(const UnicodeString &regex,
399                      uint32_t             flags,
400                      UErrorCode          &err)
401{
402    UParseError pe;
403    return compile(regex, flags, pe, err);
404}
405
406
407//
408//   compile with no UParseErr parameter, UText mode
409//
410RegexPattern * U_EXPORT2
411RegexPattern::compile(UText                *regex,
412                      uint32_t             flags,
413                      UErrorCode           &err)
414{
415    UParseError pe;
416    return compile(regex, flags, pe, err);
417}
418
419
420//---------------------------------------------------------------------
421//
422//   flags
423//
424//---------------------------------------------------------------------
425uint32_t RegexPattern::flags() const {
426    return fFlags;
427}
428
429
430//---------------------------------------------------------------------
431//
432//   matcher(UnicodeString, err)
433//
434//---------------------------------------------------------------------
435RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
436                                    UErrorCode          &status)  const {
437    RegexMatcher    *retMatcher = matcher(status);
438    if (retMatcher != NULL) {
439        retMatcher->fDeferredStatus = status;
440        retMatcher->reset(input);
441    }
442    return retMatcher;
443}
444
445//
446//   matcher, UText mode
447//
448RegexMatcher *RegexPattern::matcher(UText               *input,
449                                    PatternIsUTextFlag  /*flag*/,
450                                    UErrorCode          &status)  const {
451    RegexMatcher    *retMatcher = matcher(status);
452    if (retMatcher != NULL) {
453        retMatcher->fDeferredStatus = status;
454        retMatcher->reset(input);
455    }
456    return retMatcher;
457}
458
459#if 0
460RegexMatcher *RegexPattern::matcher(const UChar * /*input*/,
461                                    UErrorCode          &status)  const
462{
463    /* This should never get called. The API with UnicodeString should be called instead. */
464    if (U_SUCCESS(status)) {
465        status = U_UNSUPPORTED_ERROR;
466    }
467    return NULL;
468}
469#endif
470
471//---------------------------------------------------------------------
472//
473//   matcher(status)
474//
475//---------------------------------------------------------------------
476RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
477    RegexMatcher    *retMatcher = NULL;
478
479    if (U_FAILURE(status)) {
480        return NULL;
481    }
482    if (U_FAILURE(fDeferredStatus)) {
483        status = fDeferredStatus;
484        return NULL;
485    }
486
487    retMatcher = new RegexMatcher(this);
488    if (retMatcher == NULL) {
489        status = U_MEMORY_ALLOCATION_ERROR;
490        return NULL;
491    }
492    return retMatcher;
493}
494
495
496
497//---------------------------------------------------------------------
498//
499//   matches        Convenience function to test for a match, starting
500//                  with a pattern string and a data string.
501//
502//---------------------------------------------------------------------
503UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
504              const UnicodeString   &input,
505                    UParseError     &pe,
506                    UErrorCode      &status) {
507
508    if (U_FAILURE(status)) {return FALSE;}
509
510    UBool         retVal;
511    RegexPattern *pat     = NULL;
512    RegexMatcher *matcher = NULL;
513
514    pat     = RegexPattern::compile(regex, 0, pe, status);
515    matcher = pat->matcher(input, status);
516    retVal  = matcher->matches(status);
517
518    delete matcher;
519    delete pat;
520    return retVal;
521}
522
523
524//
525//   matches, UText mode
526//
527UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
528                    UText           *input,
529                    UParseError     &pe,
530                    UErrorCode      &status) {
531
532    if (U_FAILURE(status)) {return FALSE;}
533
534    UBool         retVal;
535    RegexPattern *pat     = NULL;
536    RegexMatcher *matcher = NULL;
537
538    pat     = RegexPattern::compile(regex, 0, pe, status);
539    matcher = pat->matcher(input, PATTERN_IS_UTEXT, status);
540    retVal  = matcher->matches(status);
541
542    delete matcher;
543    delete pat;
544    return retVal;
545}
546
547
548
549
550
551//---------------------------------------------------------------------
552//
553//   pattern
554//
555//---------------------------------------------------------------------
556UnicodeString RegexPattern::pattern() const {
557    if (fPatternString != NULL) {
558        return *fPatternString;
559    } else if (fPattern == NULL) {
560        return UnicodeString();
561    } else {
562        UErrorCode status = U_ZERO_ERROR;
563        int64_t nativeLen = utext_nativeLength(fPattern);
564        int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
565        UnicodeString result;
566
567        status = U_ZERO_ERROR;
568        UChar *resultChars = result.getBuffer(len16);
569        utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
570        result.releaseBuffer(len16);
571
572        return result;
573    }
574}
575
576
577
578
579//---------------------------------------------------------------------
580//
581//   patternText
582//
583//---------------------------------------------------------------------
584UText *RegexPattern::patternText(UErrorCode      &status) const {
585    if (U_FAILURE(status)) {return NULL;}
586    status = U_ZERO_ERROR;
587
588    if (fPattern != NULL) {
589        return fPattern;
590    } else {
591        RegexStaticSets::initGlobals(&status);
592        return RegexStaticSets::gStaticSets->fEmptyText;
593    }
594}
595
596
597
598//---------------------------------------------------------------------
599//
600//   split
601//
602//---------------------------------------------------------------------
603int32_t  RegexPattern::split(const UnicodeString &input,
604        UnicodeString    dest[],
605        int32_t          destCapacity,
606        UErrorCode      &status) const
607{
608    if (U_FAILURE(status)) {
609        return 0;
610    };
611
612    RegexMatcher  m(this);
613    int32_t r = 0;
614    // Check m's status to make sure all is ok.
615    if (U_SUCCESS(m.fDeferredStatus)) {
616    	r = m.split(input, dest, destCapacity, status);
617    }
618    return r;
619}
620
621//
622//   split, UText mode
623//
624int32_t  RegexPattern::split(UText *input,
625        UText           *dest[],
626        int32_t          destCapacity,
627        UErrorCode      &status) const
628{
629    if (U_FAILURE(status)) {
630        return 0;
631    };
632
633    RegexMatcher  m(this);
634    int32_t r = 0;
635    // Check m's status to make sure all is ok.
636    if (U_SUCCESS(m.fDeferredStatus)) {
637    	r = m.split(input, dest, destCapacity, status);
638    }
639    return r;
640}
641
642
643
644//---------------------------------------------------------------------
645//
646//   dump    Output the compiled form of the pattern.
647//           Debugging function only.
648//
649//---------------------------------------------------------------------
650#if defined(REGEX_DEBUG)
651void   RegexPattern::dumpOp(int32_t index) const {
652    static const char * const opNames[] = {URX_OPCODE_NAMES};
653    int32_t op          = fCompiledPat->elementAti(index);
654    int32_t val         = URX_VAL(op);
655    int32_t type        = URX_TYPE(op);
656    int32_t pinnedType  = type;
657    if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
658        pinnedType = 0;
659    }
660
661    REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
662    switch (type) {
663    case URX_NOP:
664    case URX_DOTANY:
665    case URX_DOTANY_ALL:
666    case URX_FAIL:
667    case URX_CARET:
668    case URX_DOLLAR:
669    case URX_BACKSLASH_G:
670    case URX_BACKSLASH_X:
671    case URX_END:
672    case URX_DOLLAR_M:
673    case URX_CARET_M:
674        // Types with no operand field of interest.
675        break;
676
677    case URX_RESERVED_OP:
678    case URX_START_CAPTURE:
679    case URX_END_CAPTURE:
680    case URX_STATE_SAVE:
681    case URX_JMP:
682    case URX_JMP_SAV:
683    case URX_JMP_SAV_X:
684    case URX_BACKSLASH_B:
685    case URX_BACKSLASH_BU:
686    case URX_BACKSLASH_D:
687    case URX_BACKSLASH_Z:
688    case URX_STRING_LEN:
689    case URX_CTR_INIT:
690    case URX_CTR_INIT_NG:
691    case URX_CTR_LOOP:
692    case URX_CTR_LOOP_NG:
693    case URX_RELOC_OPRND:
694    case URX_STO_SP:
695    case URX_LD_SP:
696    case URX_BACKREF:
697    case URX_STO_INP_LOC:
698    case URX_JMPX:
699    case URX_LA_START:
700    case URX_LA_END:
701    case URX_BACKREF_I:
702    case URX_LB_START:
703    case URX_LB_CONT:
704    case URX_LB_END:
705    case URX_LBN_CONT:
706    case URX_LBN_END:
707    case URX_LOOP_C:
708    case URX_LOOP_DOT_I:
709        // types with an integer operand field.
710        REGEX_DUMP_DEBUG_PRINTF(("%d", val));
711        break;
712
713    case URX_ONECHAR:
714    case URX_ONECHAR_I:
715        REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
716        break;
717
718    case URX_STRING:
719    case URX_STRING_I:
720        {
721            int32_t lengthOp       = fCompiledPat->elementAti(index+1);
722            U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
723            int32_t length = URX_VAL(lengthOp);
724            int32_t i;
725            for (i=val; i<val+length; i++) {
726                UChar c = fLiteralText[i];
727                if (c < 32 || c >= 256) {c = '.';}
728                REGEX_DUMP_DEBUG_PRINTF(("%c", c));
729            }
730        }
731        break;
732
733    case URX_SETREF:
734    case URX_LOOP_SR_I:
735        {
736            UnicodeString s;
737            UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
738            set->toPattern(s, TRUE);
739            for (int32_t i=0; i<s.length(); i++) {
740                REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
741            }
742        }
743        break;
744
745    case URX_STATIC_SETREF:
746    case URX_STAT_SETREF_N:
747        {
748            UnicodeString s;
749            if (val & URX_NEG_SET) {
750                REGEX_DUMP_DEBUG_PRINTF(("NOT "));
751                val &= ~URX_NEG_SET;
752            }
753            UnicodeSet *set = fStaticSets[val];
754            set->toPattern(s, TRUE);
755            for (int32_t i=0; i<s.length(); i++) {
756                REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
757            }
758        }
759        break;
760
761
762    default:
763        REGEX_DUMP_DEBUG_PRINTF(("??????"));
764        break;
765    }
766    REGEX_DUMP_DEBUG_PRINTF(("\n"));
767}
768#endif
769
770
771#if defined(REGEX_DEBUG)
772U_CAPI void  U_EXPORT2
773RegexPatternDump(const RegexPattern *This) {
774    int      index;
775    int      i;
776
777    REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
778    UChar32 c = utext_next32From(This->fPattern, 0);
779    while (c != U_SENTINEL) {
780        if (c<32 || c>256) {
781            c = '.';
782        }
783        REGEX_DUMP_DEBUG_PRINTF(("%c", c));
784
785        c = UTEXT_NEXT32(This->fPattern);
786    }
787    REGEX_DUMP_DEBUG_PRINTF(("\n"));
788    REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
789    REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
790    if (This->fStartType == START_STRING) {
791        REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
792        for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
793            REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
794        }
795        REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
796
797    } else if (This->fStartType == START_SET) {
798        int32_t numSetChars = This->fInitialChars->size();
799        if (numSetChars > 20) {
800            numSetChars = 20;
801        }
802        REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
803        for (i=0; i<numSetChars; i++) {
804            UChar32 c = This->fInitialChars->charAt(i);
805            if (0x20<c && c <0x7e) {
806                REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
807            } else {
808                REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
809            }
810        }
811        if (numSetChars < This->fInitialChars->size()) {
812            REGEX_DUMP_DEBUG_PRINTF((" ..."));
813        }
814        REGEX_DUMP_DEBUG_PRINTF(("\n"));
815
816    } else if (This->fStartType == START_CHAR) {
817        REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
818        if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
819                REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
820            } else {
821                REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
822            }
823    }
824
825    REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
826           "-------------------------------------------\n"));
827    for (index = 0; index<This->fCompiledPat->size(); index++) {
828        This->dumpOp(index);
829    }
830    REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
831}
832#endif
833
834
835
836UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
837
838U_NAMESPACE_END
839#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
840