1/*
2 * Copyright (C) 2008 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef Parser_h
27#define Parser_h
28
29#include <wtf/Platform.h>
30
31#if ENABLE(WREC)
32
33#include "Escapes.h"
34#include "Quantifier.h"
35#include "UString.h"
36#include "WRECGenerator.h"
37#include <wtf/ASCIICType.h>
38
39namespace JSC { namespace WREC {
40
41    struct CharacterClass;
42
43    class Parser {
44    typedef Generator::JumpList JumpList;
45    typedef Generator::ParenthesesType ParenthesesType;
46
47    friend class SavedState;
48
49    public:
50        Parser(const UString& pattern, bool ignoreCase, bool multiline)
51            : m_generator(*this)
52            , m_data(pattern.data())
53            , m_size(pattern.size())
54            , m_ignoreCase(ignoreCase)
55            , m_multiline(multiline)
56        {
57            reset();
58        }
59
60        Generator& generator() { return m_generator; }
61
62        bool ignoreCase() const { return m_ignoreCase; }
63        bool multiline() const { return m_multiline; }
64
65        void recordSubpattern() { ++m_numSubpatterns; }
66        unsigned numSubpatterns() const { return m_numSubpatterns; }
67
68        const char* error() const { return m_error; }
69        const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; }
70
71        void parsePattern(JumpList& failures)
72        {
73            reset();
74
75            parseDisjunction(failures);
76
77            if (peek() != EndOfPattern)
78                setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it.
79        }
80
81        void parseDisjunction(JumpList& failures);
82        void parseAlternative(JumpList& failures);
83        bool parseTerm(JumpList& failures);
84        bool parseNonCharacterEscape(JumpList& failures, const Escape&);
85        bool parseParentheses(JumpList& failures);
86        bool parseCharacterClass(JumpList& failures);
87        bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert);
88        bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId);
89
90    private:
91        class SavedState {
92        public:
93            SavedState(Parser& parser)
94                : m_parser(parser)
95                , m_index(parser.m_index)
96            {
97            }
98
99            void restore()
100            {
101                m_parser.m_index = m_index;
102            }
103
104        private:
105            Parser& m_parser;
106            unsigned m_index;
107        };
108
109        void reset()
110        {
111            m_index = 0;
112            m_numSubpatterns = 0;
113            m_error = 0;
114        }
115
116        void setError(const char* error)
117        {
118            if (m_error)
119                return;
120            m_error = error;
121        }
122
123        int peek()
124        {
125            if (m_index >= m_size)
126                return EndOfPattern;
127            return m_data[m_index];
128        }
129
130        int consume()
131        {
132            if (m_index >= m_size)
133                return EndOfPattern;
134            return m_data[m_index++];
135        }
136
137        bool peekIsDigit()
138        {
139            return WTF::isASCIIDigit(peek());
140        }
141
142        unsigned peekDigit()
143        {
144            ASSERT(peekIsDigit());
145            return peek() - '0';
146        }
147
148        unsigned consumeDigit()
149        {
150            ASSERT(peekIsDigit());
151            return consume() - '0';
152        }
153
154        unsigned consumeNumber()
155        {
156            int n = consumeDigit();
157            while (peekIsDigit()) {
158                n *= 10;
159                n += consumeDigit();
160            }
161            return n;
162        }
163
164        int consumeHex(int count)
165        {
166            int n = 0;
167            while (count--) {
168                if (!WTF::isASCIIHexDigit(peek()))
169                    return -1;
170                n = (n << 4) | WTF::toASCIIHexValue(consume());
171            }
172            return n;
173        }
174
175        unsigned consumeOctal()
176        {
177            unsigned n = 0;
178            while (n < 32 && WTF::isASCIIOctalDigit(peek()))
179                n = n * 8 + consumeDigit();
180            return n;
181        }
182
183        ALWAYS_INLINE Quantifier consumeGreedyQuantifier();
184        Quantifier consumeQuantifier();
185        Escape consumeEscape(bool inCharacterClass);
186        ParenthesesType consumeParenthesesType();
187
188        static const int EndOfPattern = -1;
189
190        // Error messages.
191        static const char* QuantifierOutOfOrder;
192        static const char* QuantifierWithoutAtom;
193        static const char* ParenthesesUnmatched;
194        static const char* ParenthesesTypeInvalid;
195        static const char* ParenthesesNotSupported;
196        static const char* CharacterClassUnmatched;
197        static const char* CharacterClassOutOfOrder;
198        static const char* EscapeUnterminated;
199
200        Generator m_generator;
201        const UChar* m_data;
202        unsigned m_size;
203        unsigned m_index;
204        bool m_ignoreCase;
205        bool m_multiline;
206        unsigned m_numSubpatterns;
207        const char* m_error;
208    };
209
210} } // namespace JSC::WREC
211
212#endif // ENABLE(WREC)
213
214#endif // Parser_h
215