RegexParser.h revision 5f1ab04193ad0130ca8204aadaceae083aca9881
1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef RegexParser_h
27#define RegexParser_h
28
29#include <wtf/Platform.h>
30
31#if ENABLE(YARR)
32
33#include <UString.h>
34#include <wtf/ASCIICType.h>
35#include <wtf/unicode/Unicode.h>
36#include <limits.h>
37
38namespace JSC { namespace Yarr {
39
40enum BuiltInCharacterClassID {
41    DigitClassID,
42    SpaceClassID,
43    WordClassID,
44    NewlineClassID,
45};
46
47// The Parser class should not be used directly - only via the Yarr::parse() method.
48template<class Delegate>
49class Parser {
50private:
51    template<class FriendDelegate>
52    friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
53
54    enum ErrorCode {
55        NoError,
56        PatternTooLarge,
57        QuantifierOutOfOrder,
58        QuantifierWithoutAtom,
59        MissingParentheses,
60        ParenthesesUnmatched,
61        ParenthesesTypeInvalid,
62        CharacterClassUnmatched,
63        CharacterClassOutOfOrder,
64        EscapeUnterminated,
65        NumberOfErrorCodes
66    };
67
68    /*
69     * CharacterClassParserDelegate:
70     *
71     * The class CharacterClassParserDelegate is used in the parsing of character
72     * classes.  This class handles detection of character ranges.  This class
73     * implements enough of the delegate interface such that it can be passed to
74     * parseEscape() as an EscapeDelegate.  This allows parseEscape() to be reused
75     * to perform the parsing of escape characters in character sets.
76     */
77    class CharacterClassParserDelegate {
78    public:
79        CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
80            : m_delegate(delegate)
81            , m_err(err)
82            , m_state(empty)
83        {
84        }
85
86        /*
87         * begin():
88         *
89         * Called at beginning of construction.
90         */
91        void begin(bool invert)
92        {
93            m_delegate.atomCharacterClassBegin(invert);
94        }
95
96        /*
97         * atomPatternCharacterUnescaped():
98         *
99         * This method is called directly from parseCharacterClass(), to report a new
100         * pattern character token.  This method differs from atomPatternCharacter(),
101         * which will be called from parseEscape(), since a hypen provided via this
102         * method may be indicating a character range, but a hyphen parsed by
103         * parseEscape() cannot be interpreted as doing so.
104         */
105        void atomPatternCharacterUnescaped(UChar ch)
106        {
107            switch (m_state) {
108            case empty:
109                m_character = ch;
110                m_state = cachedCharacter;
111                break;
112
113            case cachedCharacter:
114                if (ch == '-')
115                    m_state = cachedCharacterHyphen;
116                else {
117                    m_delegate.atomCharacterClassAtom(m_character);
118                    m_character = ch;
119                }
120                break;
121
122            case cachedCharacterHyphen:
123                if (ch >= m_character)
124                    m_delegate.atomCharacterClassRange(m_character, ch);
125                else
126                    m_err = CharacterClassOutOfOrder;
127                m_state = empty;
128            }
129        }
130
131        /*
132         * atomPatternCharacter():
133         *
134         * Adds a pattern character, called by parseEscape(), as such will not
135         * interpret a hyphen as indicating a character range.
136         */
137        void atomPatternCharacter(UChar ch)
138        {
139            // Flush if a character is already pending to prevent the
140            // hyphen from begin interpreted as indicating a range.
141            if((ch == '-') && (m_state == cachedCharacter))
142                flush();
143
144            atomPatternCharacterUnescaped(ch);
145        }
146
147        /*
148         * atomBuiltInCharacterClass():
149         *
150         * Adds a built-in character class, called by parseEscape().
151         */
152        void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
153        {
154            flush();
155            m_delegate.atomCharacterClassBuiltIn(classID, invert);
156        }
157
158        /*
159         * end():
160         *
161         * Called at end of construction.
162         */
163        void end()
164        {
165            flush();
166            m_delegate.atomCharacterClassEnd();
167        }
168
169        // parseEscape() should never call these delegate methods when
170        // invoked with inCharacterClass set.
171        void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
172        void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
173
174    private:
175        void flush()
176        {
177            if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen
178                m_delegate.atomCharacterClassAtom(m_character);
179            if (m_state == cachedCharacterHyphen)
180                m_delegate.atomCharacterClassAtom('-');
181            m_state = empty;
182        }
183
184        Delegate& m_delegate;
185        ErrorCode& m_err;
186        enum CharacterClassConstructionState {
187            empty,
188            cachedCharacter,
189            cachedCharacterHyphen,
190        } m_state;
191        UChar m_character;
192    };
193
194    Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit)
195        : m_delegate(delegate)
196        , m_backReferenceLimit(backReferenceLimit)
197        , m_err(NoError)
198        , m_data(pattern.data())
199        , m_size(pattern.size())
200        , m_index(0)
201        , m_parenthesesNestingDepth(0)
202    {
203    }
204
205    /*
206     * parseEscape():
207     *
208     * Helper for parseTokens() AND parseCharacterClass().
209     * Unlike the other parser methods, this function does not report tokens
210     * directly to the member delegate (m_delegate), instead tokens are
211     * emitted to the delegate provided as an argument.  In the case of atom
212     * escapes, parseTokens() will call parseEscape() passing m_delegate as
213     * an argument, and as such the escape will be reported to the delegate.
214     *
215     * However this method may also be used by parseCharacterClass(), in which
216     * case a CharacterClassParserDelegate will be passed as the delegate that
217     * tokens should be added to.  A boolean flag is also provided to indicate
218     * whether that an escape in a CharacterClass is being parsed (some parsing
219     * rules change in this context).
220     *
221     * The boolean value returned by this method indicates whether the token
222     * parsed was an atom (outside of a characted class \b and \B will be
223     * interpreted as assertions).
224     */
225    template<bool inCharacterClass, class EscapeDelegate>
226    bool parseEscape(EscapeDelegate& delegate)
227    {
228        ASSERT(!m_err);
229        ASSERT(peek() == '\\');
230        consume();
231
232        if (atEndOfPattern()) {
233            m_err = EscapeUnterminated;
234            return false;
235        }
236
237        switch (peek()) {
238        // Assertions
239        case 'b':
240            consume();
241            if (inCharacterClass)
242                delegate.atomPatternCharacter('\b');
243            else {
244                delegate.assertionWordBoundary(false);
245                return false;
246            }
247            break;
248        case 'B':
249            consume();
250            if (inCharacterClass)
251                delegate.atomPatternCharacter('B');
252            else {
253                delegate.assertionWordBoundary(true);
254                return false;
255            }
256            break;
257
258        // CharacterClassEscape
259        case 'd':
260            consume();
261            delegate.atomBuiltInCharacterClass(DigitClassID, false);
262            break;
263        case 's':
264            consume();
265            delegate.atomBuiltInCharacterClass(SpaceClassID, false);
266            break;
267        case 'w':
268            consume();
269            delegate.atomBuiltInCharacterClass(WordClassID, false);
270            break;
271        case 'D':
272            consume();
273            delegate.atomBuiltInCharacterClass(DigitClassID, true);
274            break;
275        case 'S':
276            consume();
277            delegate.atomBuiltInCharacterClass(SpaceClassID, true);
278            break;
279        case 'W':
280            consume();
281            delegate.atomBuiltInCharacterClass(WordClassID, true);
282            break;
283
284        // DecimalEscape
285        case '1':
286        case '2':
287        case '3':
288        case '4':
289        case '5':
290        case '6':
291        case '7':
292        case '8':
293        case '9': {
294            // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
295            // First, try to parse this as backreference.
296            if (!inCharacterClass) {
297                ParseState state = saveState();
298
299                unsigned backReference = consumeNumber();
300                if (backReference <= m_backReferenceLimit) {
301                    delegate.atomBackReference(backReference);
302                    break;
303                }
304
305                restoreState(state);
306            }
307
308            // Not a backreference, and not octal.
309            if (peek() >= '8') {
310                delegate.atomPatternCharacter('\\');
311                break;
312            }
313
314            // Fall-through to handle this as an octal escape.
315        }
316
317        // Octal escape
318        case '0':
319            delegate.atomPatternCharacter(consumeOctal());
320            break;
321
322        // ControlEscape
323        case 'f':
324            consume();
325            delegate.atomPatternCharacter('\f');
326            break;
327        case 'n':
328            consume();
329            delegate.atomPatternCharacter('\n');
330            break;
331        case 'r':
332            consume();
333            delegate.atomPatternCharacter('\r');
334            break;
335        case 't':
336            consume();
337            delegate.atomPatternCharacter('\t');
338            break;
339        case 'v':
340            consume();
341            delegate.atomPatternCharacter('\v');
342            break;
343
344        // ControlLetter
345        case 'c': {
346            ParseState state = saveState();
347            consume();
348            if (!atEndOfPattern()) {
349                int control = consume();
350
351                // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
352                if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
353                    delegate.atomPatternCharacter(control & 0x1f);
354                    break;
355                }
356            }
357            restoreState(state);
358            delegate.atomPatternCharacter('\\');
359            break;
360        }
361
362        // HexEscape
363        case 'x': {
364            consume();
365            int x = tryConsumeHex(2);
366            if (x == -1)
367                delegate.atomPatternCharacter('x');
368            else
369                delegate.atomPatternCharacter(x);
370            break;
371        }
372
373        // UnicodeEscape
374        case 'u': {
375            consume();
376            int u = tryConsumeHex(4);
377            if (u == -1)
378                delegate.atomPatternCharacter('u');
379            else
380                delegate.atomPatternCharacter(u);
381            break;
382        }
383
384        // IdentityEscape
385        default:
386            delegate.atomPatternCharacter(consume());
387        }
388
389        return true;
390    }
391
392    /*
393     * parseAtomEscape(), parseCharacterClassEscape():
394     *
395     * These methods alias to parseEscape().
396     */
397    bool parseAtomEscape()
398    {
399        return parseEscape<false>(m_delegate);
400    }
401    void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
402    {
403        parseEscape<true>(delegate);
404    }
405
406    /*
407     * parseCharacterClass():
408     *
409     * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
410     * to an instance of CharacterClassParserDelegate, to describe the character class to the
411     * delegate.
412     */
413    void parseCharacterClass()
414    {
415        ASSERT(!m_err);
416        ASSERT(peek() == '[');
417        consume();
418
419        CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
420
421        characterClassConstructor.begin(tryConsume('^'));
422
423        while (!atEndOfPattern()) {
424            switch (peek()) {
425            case ']':
426                consume();
427                characterClassConstructor.end();
428                return;
429
430            case '\\':
431                parseCharacterClassEscape(characterClassConstructor);
432                break;
433
434            default:
435                characterClassConstructor.atomPatternCharacterUnescaped(consume());
436            }
437
438            if (m_err)
439                return;
440        }
441
442        m_err = CharacterClassUnmatched;
443    }
444
445    /*
446     * parseParenthesesBegin():
447     *
448     * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
449     */
450    void parseParenthesesBegin()
451    {
452        ASSERT(!m_err);
453        ASSERT(peek() == '(');
454        consume();
455
456        if (tryConsume('?')) {
457            if (atEndOfPattern()) {
458                m_err = ParenthesesTypeInvalid;
459                return;
460            }
461
462            switch (consume()) {
463            case ':':
464                m_delegate.atomParenthesesSubpatternBegin(false);
465                break;
466
467            case '=':
468                m_delegate.atomParentheticalAssertionBegin();
469                break;
470
471            case '!':
472                m_delegate.atomParentheticalAssertionBegin(true);
473                break;
474
475            default:
476                m_err = ParenthesesTypeInvalid;
477            }
478        } else
479            m_delegate.atomParenthesesSubpatternBegin();
480
481        ++m_parenthesesNestingDepth;
482    }
483
484    /*
485     * parseParenthesesEnd():
486     *
487     * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
488     */
489    void parseParenthesesEnd()
490    {
491        ASSERT(!m_err);
492        ASSERT(peek() == ')');
493        consume();
494
495        if (m_parenthesesNestingDepth > 0)
496            m_delegate.atomParenthesesEnd();
497        else
498            m_err = ParenthesesUnmatched;
499
500        --m_parenthesesNestingDepth;
501    }
502
503    /*
504     * parseQuantifier():
505     *
506     * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
507     */
508    void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
509    {
510        ASSERT(!m_err);
511        ASSERT(min <= max);
512
513        if (lastTokenWasAnAtom)
514            m_delegate.quantifyAtom(min, max, !tryConsume('?'));
515        else
516            m_err = QuantifierWithoutAtom;
517    }
518
519    /*
520     * parseTokens():
521     *
522     * This method loops over the input pattern reporting tokens to the delegate.
523     * The method returns when a parse error is detected, or the end of the pattern
524     * is reached.  One piece of state is tracked around the loop, which is whether
525     * the last token passed to the delegate was an atom (this is necessary to detect
526     * a parse error when a quantifier provided without an atom to quantify).
527     */
528    void parseTokens()
529    {
530        bool lastTokenWasAnAtom = false;
531
532        while (!atEndOfPattern()) {
533            switch (peek()) {
534            case '|':
535                consume();
536                m_delegate.disjunction();
537                lastTokenWasAnAtom = false;
538                break;
539
540            case '(':
541                parseParenthesesBegin();
542                lastTokenWasAnAtom = false;
543                break;
544
545            case ')':
546                parseParenthesesEnd();
547                lastTokenWasAnAtom = true;
548                break;
549
550            case '^':
551                consume();
552                m_delegate.assertionBOL();
553                lastTokenWasAnAtom = false;
554                break;
555
556            case '$':
557                consume();
558                m_delegate.assertionEOL();
559                lastTokenWasAnAtom = false;
560                break;
561
562            case '.':
563                consume();
564                m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
565                lastTokenWasAnAtom = true;
566                break;
567
568            case '[':
569                parseCharacterClass();
570                lastTokenWasAnAtom = true;
571                break;
572
573            case '\\':
574                lastTokenWasAnAtom = parseAtomEscape();
575                break;
576
577            case '*':
578                consume();
579                parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX);
580                lastTokenWasAnAtom = false;
581                break;
582
583            case '+':
584                consume();
585                parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX);
586                lastTokenWasAnAtom = false;
587                break;
588
589            case '?':
590                consume();
591                parseQuantifier(lastTokenWasAnAtom, 0, 1);
592                lastTokenWasAnAtom = false;
593                break;
594
595            case '{': {
596                ParseState state = saveState();
597
598                consume();
599                if (peekIsDigit()) {
600                    unsigned min = consumeNumber();
601                    unsigned max = min;
602
603                    if (tryConsume(','))
604                        max = peekIsDigit() ? consumeNumber() : UINT_MAX;
605
606                    if (tryConsume('}')) {
607                        if (min <= max)
608                            parseQuantifier(lastTokenWasAnAtom, min, max);
609                        else
610                            m_err = QuantifierOutOfOrder;
611                        lastTokenWasAnAtom = false;
612                        break;
613                    }
614                }
615
616                restoreState(state);
617            } // if we did not find a complete quantifer, fall through to the default case.
618
619            default:
620                m_delegate.atomPatternCharacter(consume());
621                lastTokenWasAnAtom = true;
622            }
623
624            if (m_err)
625                return;
626        }
627
628        if (m_parenthesesNestingDepth > 0)
629            m_err = MissingParentheses;
630    }
631
632    /*
633     * parse():
634     *
635     * This method calls regexBegin(), calls parseTokens() to parse over the input
636     * patterns, calls regexEnd() or regexError() as appropriate, and converts any
637     * error code to a const char* for a result.
638     */
639    const char* parse()
640    {
641        m_delegate.regexBegin();
642
643        if (m_size > MAX_PATTERN_SIZE)
644            m_err = PatternTooLarge;
645        else
646            parseTokens();
647        ASSERT(atEndOfPattern() || m_err);
648
649        if (m_err)
650            m_delegate.regexError();
651        else
652            m_delegate.regexEnd();
653
654        // The order of this array must match the ErrorCode enum.
655        static const char* errorMessages[NumberOfErrorCodes] = {
656            0, // NoError
657            "regular expression too large",
658            "numbers out of order in {} quantifier",
659            "nothing to repeat",
660            "missing )",
661            "unmatched parentheses",
662            "unrecognized character after (?",
663            "missing terminating ] for character class",
664            "range out of order in character class",
665            "\\ at end of pattern"
666        };
667
668        return errorMessages[m_err];
669    }
670
671
672    // Misc helper functions:
673
674    typedef unsigned ParseState;
675
676    ParseState saveState()
677    {
678        return m_index;
679    }
680
681    void restoreState(ParseState state)
682    {
683        m_index = state;
684    }
685
686    bool atEndOfPattern()
687    {
688        ASSERT(m_index <= m_size);
689        return m_index == m_size;
690    }
691
692    int peek()
693    {
694        ASSERT(m_index < m_size);
695        return m_data[m_index];
696    }
697
698    bool peekIsDigit()
699    {
700        return !atEndOfPattern() && WTF::isASCIIDigit(peek());
701    }
702
703    unsigned peekDigit()
704    {
705        ASSERT(peekIsDigit());
706        return peek() - '0';
707    }
708
709    int consume()
710    {
711        ASSERT(m_index < m_size);
712        return m_data[m_index++];
713    }
714
715    unsigned consumeDigit()
716    {
717        ASSERT(peekIsDigit());
718        return consume() - '0';
719    }
720
721    unsigned consumeNumber()
722    {
723        unsigned n = consumeDigit();
724        // check for overflow.
725        for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) {
726            n = newValue;
727            consume();
728        }
729        return n;
730    }
731
732    unsigned consumeOctal()
733    {
734        ASSERT(WTF::isASCIIOctalDigit(peek()));
735
736        unsigned n = consumeDigit();
737        while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
738            n = n * 8 + consumeDigit();
739        return n;
740    }
741
742    bool tryConsume(UChar ch)
743    {
744        if (atEndOfPattern() || (m_data[m_index] != ch))
745            return false;
746        ++m_index;
747        return true;
748    }
749
750    int tryConsumeHex(int count)
751    {
752        ParseState state = saveState();
753
754        int n = 0;
755        while (count--) {
756            if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
757                restoreState(state);
758                return -1;
759            }
760            n = (n << 4) | WTF::toASCIIHexValue(consume());
761        }
762        return n;
763    }
764
765    Delegate& m_delegate;
766    unsigned m_backReferenceLimit;
767    ErrorCode m_err;
768    const UChar* m_data;
769    unsigned m_size;
770    unsigned m_index;
771    unsigned m_parenthesesNestingDepth;
772
773    // Derived by empirical testing of compile time in PCRE and WREC.
774    static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
775};
776
777/*
778 * Yarr::parse():
779 *
780 * The parse method is passed a pattern to be parsed and a delegate upon which
781 * callbacks will be made to record the parsed tokens forming the regex.
782 * Yarr::parse() returns null on success, or a const C string providing an error
783 * message where a parse error occurs.
784 *
785 * The Delegate must implement the following interface:
786 *
787 *    void assertionBOL();
788 *    void assertionEOL();
789 *    void assertionWordBoundary(bool invert);
790 *
791 *    void atomPatternCharacter(UChar ch);
792 *    void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
793 *    void atomCharacterClassBegin(bool invert)
794 *    void atomCharacterClassAtom(UChar ch)
795 *    void atomCharacterClassRange(UChar begin, UChar end)
796 *    void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
797 *    void atomCharacterClassEnd()
798 *    void atomParenthesesSubpatternBegin(bool capture = true);
799 *    void atomParentheticalAssertionBegin(bool invert = false);
800 *    void atomParenthesesEnd();
801 *    void atomBackReference(unsigned subpatternId);
802 *
803 *    void quantifyAtom(unsigned min, unsigned max, bool greedy);
804 *
805 *    void disjunction();
806 *
807 *    void regexBegin();
808 *    void regexEnd();
809 *    void regexError();
810 *
811 * Before any call recording tokens are made, regexBegin() will be called on the
812 * delegate once.  Once parsing is complete either regexEnd() or regexError() will
813 * be called, as appropriate.
814 *
815 * The regular expression is described by a sequence of assertion*() and atom*()
816 * callbacks to the delegate, describing the terms in the regular expression.
817 * Following an atom a quantifyAtom() call may occur to indicate that the previous
818 * atom should be quantified.  In the case of atoms described across multiple
819 * calls (parentheses and character classes) the call to quantifyAtom() will come
820 * after the call to the atom*End() method, never after atom*Begin().
821 *
822 * Character classes may either be described by a single call to
823 * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
824 * In the latter case, ...Begin() will be called, followed by a sequence of
825 * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
826 *
827 * Sequences of atoms and assertions are broken into alternatives via calls to
828 * disjunction().  Assertions, atoms, and disjunctions emitted between calls to
829 * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
830 * atomParenthesesBegin() is passed a subpatternId.  In the case of a regular
831 * capturing subpattern, this will be the subpatternId associated with these
832 * parentheses, and will also by definition be the lowest subpatternId of these
833 * parentheses and of any nested paretheses.  The atomParenthesesEnd() method
834 * is passed the subpatternId of the last capturing subexpression nested within
835 * these paretheses.  In the case of a capturing subpattern with no nested
836 * capturing subpatterns, the same subpatternId will be passed to the begin and
837 * end functions.  In the case of non-capturing subpatterns the subpatternId
838 * passed to the begin method is also the first possible subpatternId that might
839 * be nested within these paretheses.  If a set of non-capturing parentheses does
840 * not contain any capturing subpatterns, then the subpatternId passed to begin
841 * will be greater than the subpatternId passed to end.
842 */
843
844template<class Delegate>
845const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX)
846{
847    return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
848}
849
850} } // namespace JSC::Yarr
851
852#endif
853
854#endif // RegexParser_h
855