15f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian/*
25f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Copyright (C) 2009 Apple Inc. All rights reserved.
35f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
45f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Redistribution and use in source and binary forms, with or without
55f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * modification, are permitted provided that the following conditions
65f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * are met:
75f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 1. Redistributions of source code must retain the above copyright
85f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    notice, this list of conditions and the following disclaimer.
95f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * 2. Redistributions in binary form must reproduce the above copyright
105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    notice, this list of conditions and the following disclaimer in the
115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    documentation and/or other materials provided with the distribution.
125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */
255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#ifndef RegexParser_h
275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#define RegexParser_h
285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <wtf/Platform.h>
305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#if ENABLE(YARR)
325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <UString.h>
345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <wtf/ASCIICType.h>
355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <wtf/unicode/Unicode.h>
365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#include <limits.h>
375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qiannamespace JSC { namespace Yarr {
395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qianenum BuiltInCharacterClassID {
415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    DigitClassID,
425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    SpaceClassID,
435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    WordClassID,
445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    NewlineClassID,
455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian};
465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian// The Parser class should not be used directly - only via the Yarr::parse() method.
485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qiantemplate<class Delegate>
495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qianclass Parser {
505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qianprivate:
515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    template<class FriendDelegate>
525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    enum ErrorCode {
555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        NoError,
565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        PatternTooLarge,
575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        QuantifierOutOfOrder,
585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        QuantifierWithoutAtom,
595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        MissingParentheses,
605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ParenthesesUnmatched,
615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ParenthesesTypeInvalid,
625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        CharacterClassUnmatched,
635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        CharacterClassOutOfOrder,
645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        EscapeUnterminated,
655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        NumberOfErrorCodes
665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    };
675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    /*
695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * CharacterClassParserDelegate:
705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * The class CharacterClassParserDelegate is used in the parsing of character
725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * classes.  This class handles detection of character ranges.  This class
735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * implements enough of the delegate interface such that it can be passed to
745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parseEscape() as an EscapeDelegate.  This allows parseEscape() to be reused
755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * to perform the parsing of escape characters in character sets.
765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     */
775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    class CharacterClassParserDelegate {
785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    public:
795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            : m_delegate(delegate)
815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            , m_err(err)
825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            , m_state(empty)
835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        {
845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        /*
875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * begin():
885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         *
895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * Called at beginning of construction.
905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         */
915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        void begin(bool invert)
925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        {
935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_delegate.atomCharacterClassBegin(invert);
945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        /*
975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * atomPatternCharacterUnescaped():
985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         *
995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * This method is called directly from parseCharacterClass(), to report a new
1005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * pattern character token.  This method differs from atomPatternCharacter(),
1015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * which will be called from parseEscape(), since a hypen provided via this
1025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * method may be indicating a character range, but a hyphen parsed by
1035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * parseEscape() cannot be interpreted as doing so.
1045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         */
1055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        void atomPatternCharacterUnescaped(UChar ch)
1065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        {
1075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            switch (m_state) {
1085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case empty:
1095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_character = ch;
1105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_state = cachedCharacter;
1115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
1125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case cachedCharacter:
1145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                if (ch == '-')
1155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    m_state = cachedCharacterHyphen;
1165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                else {
1175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    m_delegate.atomCharacterClassAtom(m_character);
1185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    m_character = ch;
1195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                }
1205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
1215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case cachedCharacterHyphen:
1235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                if (ch >= m_character)
1245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    m_delegate.atomCharacterClassRange(m_character, ch);
1255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                else
1265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    m_err = CharacterClassOutOfOrder;
1275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_state = empty;
1285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
1295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
1305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        /*
1325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * atomPatternCharacter():
1335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         *
1345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * Adds a pattern character, called by parseEscape(), as such will not
1355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * interpret a hyphen as indicating a character range.
1365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         */
1375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        void atomPatternCharacter(UChar ch)
1385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        {
1395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            // Flush if a character is already pending to prevent the
1405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            // hyphen from begin interpreted as indicating a range.
1415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if((ch == '-') && (m_state == cachedCharacter))
1425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                flush();
1435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            atomPatternCharacterUnescaped(ch);
1455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
1465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        /*
1485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * atomBuiltInCharacterClass():
1495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         *
1505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * Adds a built-in character class, called by parseEscape().
1515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         */
1525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
1535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        {
1545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            flush();
1555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_delegate.atomCharacterClassBuiltIn(classID, invert);
1565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
1575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        /*
1595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * end():
1605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         *
1615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         * Called at end of construction.
1625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian         */
1635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        void end()
1645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        {
1655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            flush();
1665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_delegate.atomCharacterClassEnd();
1675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
1685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // parseEscape() should never call these delegate methods when
1705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // invoked with inCharacterClass set.
1715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
1725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
1735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    private:
1755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        void flush()
1765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        {
1775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen
1785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.atomCharacterClassAtom(m_character);
1795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (m_state == cachedCharacterHyphen)
1805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.atomCharacterClassAtom('-');
1815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_state = empty;
1825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
1835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        Delegate& m_delegate;
1855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ErrorCode& m_err;
1865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        enum CharacterClassConstructionState {
1875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            empty,
1885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            cachedCharacter,
1895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            cachedCharacterHyphen,
1905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        } m_state;
1915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        UChar m_character;
1925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    };
1935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
1945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit)
1955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        : m_delegate(delegate)
1965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        , m_backReferenceLimit(backReferenceLimit)
1975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        , m_err(NoError)
1985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        , m_data(pattern.data())
1995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        , m_size(pattern.size())
2005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        , m_index(0)
2015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        , m_parenthesesNestingDepth(0)
2025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
2035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
2045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
2055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    /*
2065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parseEscape():
2075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
2085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * Helper for parseTokens() AND parseCharacterClass().
2095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * Unlike the other parser methods, this function does not report tokens
2105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * directly to the member delegate (m_delegate), instead tokens are
2115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * emitted to the delegate provided as an argument.  In the case of atom
2125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * escapes, parseTokens() will call parseEscape() passing m_delegate as
2135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * an argument, and as such the escape will be reported to the delegate.
2145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
2155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * However this method may also be used by parseCharacterClass(), in which
2165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * case a CharacterClassParserDelegate will be passed as the delegate that
2175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * tokens should be added to.  A boolean flag is also provided to indicate
2185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * whether that an escape in a CharacterClass is being parsed (some parsing
2195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * rules change in this context).
2205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
2215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * The boolean value returned by this method indicates whether the token
2225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parsed was an atom (outside of a characted class \b and \B will be
2235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * interpreted as assertions).
2245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     */
2255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    template<bool inCharacterClass, class EscapeDelegate>
2265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    bool parseEscape(EscapeDelegate& delegate)
2275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
2285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(!m_err);
2295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(peek() == '\\');
2305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        consume();
2315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
2325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        if (atEndOfPattern()) {
2335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_err = EscapeUnterminated;
2345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            return false;
2355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
2365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
2375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        switch (peek()) {
2385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // Assertions
2395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'b':
2405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
2415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (inCharacterClass)
2425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                delegate.atomPatternCharacter('\b');
2435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            else {
2445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                delegate.assertionWordBoundary(false);
2455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                return false;
2465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
2475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
2485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'B':
2495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
2505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (inCharacterClass)
2515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                delegate.atomPatternCharacter('B');
2525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            else {
2535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                delegate.assertionWordBoundary(true);
2545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                return false;
2555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
2565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
2575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
2585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // CharacterClassEscape
2595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'd':
2605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
2615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomBuiltInCharacterClass(DigitClassID, false);
2625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
2635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 's':
2645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
2655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomBuiltInCharacterClass(SpaceClassID, false);
2665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
2675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'w':
2685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
2695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomBuiltInCharacterClass(WordClassID, false);
2705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
2715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'D':
2725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
2735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomBuiltInCharacterClass(DigitClassID, true);
2745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
2755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'S':
2765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
2775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomBuiltInCharacterClass(SpaceClassID, true);
2785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
2795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'W':
2805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
2815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomBuiltInCharacterClass(WordClassID, true);
2825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
2835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
2845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // DecimalEscape
2855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '1':
2865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '2':
2875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '3':
2885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '4':
2895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '5':
2905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '6':
2915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '7':
2925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '8':
2935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '9': {
2945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
2955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            // First, try to parse this as backreference.
2965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (!inCharacterClass) {
2975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                ParseState state = saveState();
2985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
2995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                unsigned backReference = consumeNumber();
3005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                if (backReference <= m_backReferenceLimit) {
3015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    delegate.atomBackReference(backReference);
3025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    break;
3035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                }
3045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                restoreState(state);
3065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
3075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            // Not a backreference, and not octal.
3095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (peek() >= '8') {
3105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                delegate.atomPatternCharacter('\\');
3115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
3125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
3135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            // Fall-through to handle this as an octal escape.
3155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
3165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // Octal escape
3185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case '0':
3195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomPatternCharacter(consumeOctal());
3205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
3215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // ControlEscape
3235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'f':
3245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
3255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomPatternCharacter('\f');
3265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
3275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'n':
3285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
3295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomPatternCharacter('\n');
3305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
3315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'r':
3325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
3335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomPatternCharacter('\r');
3345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
3355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 't':
3365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
3375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomPatternCharacter('\t');
3385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
3395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'v':
3405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
3415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomPatternCharacter('\v');
3425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
3435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // ControlLetter
3455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'c': {
3465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            ParseState state = saveState();
3475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
3485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (!atEndOfPattern()) {
3495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                int control = consume();
3505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
3525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
3535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    delegate.atomPatternCharacter(control & 0x1f);
3545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    break;
3555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                }
3565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
3575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            restoreState(state);
3585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomPatternCharacter('\\');
3595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
3605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
3615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // HexEscape
3635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'x': {
3645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
3655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            int x = tryConsumeHex(2);
3665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (x == -1)
3675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                delegate.atomPatternCharacter('x');
3685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            else
3695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                delegate.atomPatternCharacter(x);
3705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
3715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
3725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // UnicodeEscape
3745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        case 'u': {
3755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
3765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            int u = tryConsumeHex(4);
3775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (u == -1)
3785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                delegate.atomPatternCharacter('u');
3795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            else
3805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                delegate.atomPatternCharacter(u);
3815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            break;
3825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
3835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // IdentityEscape
3855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        default:
3865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            delegate.atomPatternCharacter(consume());
3875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
3885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return true;
3905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
3915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
3925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    /*
3935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parseAtomEscape(), parseCharacterClassEscape():
3945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
3955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * These methods alias to parseEscape().
3965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     */
3975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    bool parseAtomEscape()
3985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
3995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return parseEscape<false>(m_delegate);
4005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
4015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
4025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
4035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        parseEscape<true>(delegate);
4045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
4055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    /*
4075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parseCharacterClass():
4085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
4095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
4105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * to an instance of CharacterClassParserDelegate, to describe the character class to the
4115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * delegate.
4125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     */
4135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    void parseCharacterClass()
4145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
4155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(!m_err);
4165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(peek() == '[');
4175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        consume();
4185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
4205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        characterClassConstructor.begin(tryConsume('^'));
4225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        while (!atEndOfPattern()) {
4245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            switch (peek()) {
4255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case ']':
4265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                consume();
4275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                characterClassConstructor.end();
4285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                return;
4295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '\\':
4315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                parseCharacterClassEscape(characterClassConstructor);
4325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
4335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            default:
4355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                characterClassConstructor.atomPatternCharacterUnescaped(consume());
4365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
4375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (m_err)
4395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                return;
4405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
4415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        m_err = CharacterClassUnmatched;
4435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
4445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    /*
4465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parseParenthesesBegin():
4475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
4485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
4495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     */
4505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    void parseParenthesesBegin()
4515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
4525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(!m_err);
4535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(peek() == '(');
4545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        consume();
4555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        if (tryConsume('?')) {
4575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (atEndOfPattern()) {
4585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_err = ParenthesesTypeInvalid;
4595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                return;
4605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
4615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            switch (consume()) {
4635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case ':':
4645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.atomParenthesesSubpatternBegin(false);
4655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
4665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '=':
4685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.atomParentheticalAssertionBegin();
4695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
4705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '!':
4725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.atomParentheticalAssertionBegin(true);
4735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
4745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            default:
4765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_err = ParenthesesTypeInvalid;
4775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
4785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        } else
4795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_delegate.atomParenthesesSubpatternBegin();
4805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ++m_parenthesesNestingDepth;
4825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
4835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    /*
4855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parseParenthesesEnd():
4865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
4875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
4885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     */
4895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    void parseParenthesesEnd()
4905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
4915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(!m_err);
4925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(peek() == ')');
4935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        consume();
4945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
4955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        if (m_parenthesesNestingDepth > 0)
4965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_delegate.atomParenthesesEnd();
4975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        else
4985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_err = ParenthesesUnmatched;
4995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        --m_parenthesesNestingDepth;
5015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
5025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    /*
5045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parseQuantifier():
5055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
5065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
5075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     */
5085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
5095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
5105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(!m_err);
5115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(min <= max);
5125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        if (lastTokenWasAnAtom)
5145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_delegate.quantifyAtom(min, max, !tryConsume('?'));
5155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        else
5165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_err = QuantifierWithoutAtom;
5175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
5185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    /*
5205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parseTokens():
5215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
5225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * This method loops over the input pattern reporting tokens to the delegate.
5235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * The method returns when a parse error is detected, or the end of the pattern
5245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * is reached.  One piece of state is tracked around the loop, which is whether
5255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * the last token passed to the delegate was an atom (this is necessary to detect
5265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * a parse error when a quantifier provided without an atom to quantify).
5275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     */
5285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    void parseTokens()
5295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
5305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        bool lastTokenWasAnAtom = false;
5315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        while (!atEndOfPattern()) {
5335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            switch (peek()) {
5345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '|':
5355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                consume();
5365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.disjunction();
5375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = false;
5385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '(':
5415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                parseParenthesesBegin();
5425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = false;
5435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case ')':
5465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                parseParenthesesEnd();
5475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = true;
5485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '^':
5515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                consume();
5525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.assertionBOL();
5535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = false;
5545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '$':
5575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                consume();
5585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.assertionEOL();
5595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = false;
5605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '.':
5635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                consume();
5645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
5655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = true;
5665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '[':
5695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                parseCharacterClass();
5705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = true;
5715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '\\':
5745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = parseAtomEscape();
5755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '*':
5785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                consume();
5795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX);
5805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = false;
5815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '+':
5845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                consume();
5855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX);
5865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = false;
5875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '?':
5905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                consume();
5915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                parseQuantifier(lastTokenWasAnAtom, 0, 1);
5925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = false;
5935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                break;
5945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            case '{': {
5965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                ParseState state = saveState();
5975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
5985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                consume();
5995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                if (peekIsDigit()) {
6005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    unsigned min = consumeNumber();
6015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    unsigned max = min;
6025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    if (tryConsume(','))
6045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                        max = peekIsDigit() ? consumeNumber() : UINT_MAX;
6055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    if (tryConsume('}')) {
6075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                        if (min <= max)
6085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                            parseQuantifier(lastTokenWasAnAtom, min, max);
6095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                        else
6105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                            m_err = QuantifierOutOfOrder;
6115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                        lastTokenWasAnAtom = false;
6125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                        break;
6135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                    }
6145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                }
6155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                restoreState(state);
6175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            } // if we did not find a complete quantifer, fall through to the default case.
6185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            default:
6205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                m_delegate.atomPatternCharacter(consume());
6215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                lastTokenWasAnAtom = true;
6225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
6235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (m_err)
6255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                return;
6265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
6275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        if (m_parenthesesNestingDepth > 0)
6295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_err = MissingParentheses;
6305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
6315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    /*
6335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * parse():
6345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     *
6355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * This method calls regexBegin(), calls parseTokens() to parse over the input
6365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * patterns, calls regexEnd() or regexError() as appropriate, and converts any
6375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     * error code to a const char* for a result.
6385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian     */
6395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    const char* parse()
6405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
6415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        m_delegate.regexBegin();
6425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        if (m_size > MAX_PATTERN_SIZE)
6445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_err = PatternTooLarge;
6455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        else
6465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            parseTokens();
6475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(atEndOfPattern() || m_err);
6485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        if (m_err)
6505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_delegate.regexError();
6515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        else
6525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            m_delegate.regexEnd();
6535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // The order of this array must match the ErrorCode enum.
6555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        static const char* errorMessages[NumberOfErrorCodes] = {
6565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            0, // NoError
6575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            "regular expression too large",
6585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            "numbers out of order in {} quantifier",
6595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            "nothing to repeat",
6605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            "missing )",
6615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            "unmatched parentheses",
6625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            "unrecognized character after (?",
6635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            "missing terminating ] for character class",
6645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            "range out of order in character class",
6655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            "\\ at end of pattern"
6665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        };
6675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return errorMessages[m_err];
6695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
6705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    // Misc helper functions:
6735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    typedef unsigned ParseState;
6755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    ParseState saveState()
6775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
6785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return m_index;
6795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
6805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    void restoreState(ParseState state)
6825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
6835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        m_index = state;
6845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
6855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    bool atEndOfPattern()
6875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
6885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(m_index <= m_size);
6895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return m_index == m_size;
6905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
6915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    int peek()
6935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
6945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(m_index < m_size);
6955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return m_data[m_index];
6965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
6975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
6985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    bool peekIsDigit()
6995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
7005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return !atEndOfPattern() && WTF::isASCIIDigit(peek());
7015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
7025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    unsigned peekDigit()
7045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
7055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(peekIsDigit());
7065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return peek() - '0';
7075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
7085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    int consume()
7105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
7115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(m_index < m_size);
7125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return m_data[m_index++];
7135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
7145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    unsigned consumeDigit()
7165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
7175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(peekIsDigit());
7185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return consume() - '0';
7195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
7205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    unsigned consumeNumber()
7225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
7235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        unsigned n = consumeDigit();
7245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        // check for overflow.
7255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) {
7265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            n = newValue;
7275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            consume();
7285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
7295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return n;
7305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
7315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    unsigned consumeOctal()
7335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
7345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ASSERT(WTF::isASCIIOctalDigit(peek()));
7355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        unsigned n = consumeDigit();
7375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
7385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            n = n * 8 + consumeDigit();
7395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return n;
7405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
7415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    bool tryConsume(UChar ch)
7435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
7445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        if (atEndOfPattern() || (m_data[m_index] != ch))
7455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            return false;
7465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ++m_index;
7475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return true;
7485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
7495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    int tryConsumeHex(int count)
7515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    {
7525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        ParseState state = saveState();
7535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        int n = 0;
7555f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        while (count--) {
7565f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
7575f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                restoreState(state);
7585f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian                return -1;
7595f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            }
7605f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian            n = (n << 4) | WTF::toASCIIHexValue(consume());
7615f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        }
7625f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian        return n;
7635f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    }
7645f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7655f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    Delegate& m_delegate;
7665f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    unsigned m_backReferenceLimit;
7675f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    ErrorCode m_err;
7685f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    const UChar* m_data;
7695f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    unsigned m_size;
7705f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    unsigned m_index;
7715f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    unsigned m_parenthesesNestingDepth;
7725f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7735f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    // Derived by empirical testing of compile time in PCRE and WREC.
7745f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
7755f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian};
7765f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
7775f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian/*
7785f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Yarr::parse():
7795f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
7805f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * The parse method is passed a pattern to be parsed and a delegate upon which
7815f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * callbacks will be made to record the parsed tokens forming the regex.
7825f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Yarr::parse() returns null on success, or a const C string providing an error
7835f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * message where a parse error occurs.
7845f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
7855f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * The Delegate must implement the following interface:
7865f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
7875f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void assertionBOL();
7885f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void assertionEOL();
7895f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void assertionWordBoundary(bool invert);
7905f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
7915f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomPatternCharacter(UChar ch);
7925f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
7935f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomCharacterClassBegin(bool invert)
7945f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomCharacterClassAtom(UChar ch)
7955f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomCharacterClassRange(UChar begin, UChar end)
7965f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
7975f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomCharacterClassEnd()
7985f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomParenthesesSubpatternBegin(bool capture = true);
7995f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomParentheticalAssertionBegin(bool invert = false);
8005f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomParenthesesEnd();
8015f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void atomBackReference(unsigned subpatternId);
8025f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
8035f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void quantifyAtom(unsigned min, unsigned max, bool greedy);
8045f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
8055f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void disjunction();
8065f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
8075f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void regexBegin();
8085f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void regexEnd();
8095f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *    void regexError();
8105f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
8115f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Before any call recording tokens are made, regexBegin() will be called on the
8125f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * delegate once.  Once parsing is complete either regexEnd() or regexError() will
8135f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * be called, as appropriate.
8145f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
8155f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * The regular expression is described by a sequence of assertion*() and atom*()
8165f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * callbacks to the delegate, describing the terms in the regular expression.
8175f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Following an atom a quantifyAtom() call may occur to indicate that the previous
8185f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atom should be quantified.  In the case of atoms described across multiple
8195f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * calls (parentheses and character classes) the call to quantifyAtom() will come
8205f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * after the call to the atom*End() method, never after atom*Begin().
8215f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
8225f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Character classes may either be described by a single call to
8235f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
8245f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * In the latter case, ...Begin() will be called, followed by a sequence of
8255f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
8265f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian *
8275f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * Sequences of atoms and assertions are broken into alternatives via calls to
8285f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * disjunction().  Assertions, atoms, and disjunctions emitted between calls to
8295f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
8305f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * atomParenthesesBegin() is passed a subpatternId.  In the case of a regular
8315f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * capturing subpattern, this will be the subpatternId associated with these
8325f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parentheses, and will also by definition be the lowest subpatternId of these
8335f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * parentheses and of any nested paretheses.  The atomParenthesesEnd() method
8345f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * is passed the subpatternId of the last capturing subexpression nested within
8355f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * these paretheses.  In the case of a capturing subpattern with no nested
8365f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * capturing subpatterns, the same subpatternId will be passed to the begin and
8375f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * end functions.  In the case of non-capturing subpatterns the subpatternId
8385f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * passed to the begin method is also the first possible subpatternId that might
8395f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * be nested within these paretheses.  If a set of non-capturing parentheses does
8405f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * not contain any capturing subpatterns, then the subpatternId passed to begin
8415f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian * will be greater than the subpatternId passed to end.
8425f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian */
8435f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
8445f1ab04193ad0130ca8204aadaceae083aca9881Feng Qiantemplate<class Delegate>
8455f1ab04193ad0130ca8204aadaceae083aca9881Feng Qianconst char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX)
8465f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian{
8475f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian    return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
8485f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian}
8495f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
8505f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian} } // namespace JSC::Yarr
8515f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
8525f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#endif
8535f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian
8545f1ab04193ad0130ca8204aadaceae083aca9881Feng Qian#endif // RegexParser_h
855