1/*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "IDBKeyPath.h"
28
29#if ENABLE(INDEXED_DATABASE)
30
31#include <wtf/ASCIICType.h>
32#include <wtf/dtoa.h>
33
34namespace WebCore {
35
36class IDBKeyPathLexer {
37public:
38    enum TokenType {
39        TokenLeftBracket,
40        TokenRightBracket,
41        TokenIdentifier,
42        TokenNumber,
43        TokenDot,
44        TokenEnd,
45        TokenError
46    };
47
48    explicit IDBKeyPathLexer(const String& s)
49        : m_string(s)
50        , m_ptr(s.characters())
51        , m_end(s.characters() + s.length())
52        , m_currentTokenType(TokenError)
53    {
54    }
55
56    TokenType currentTokenType() const { return m_currentTokenType; }
57
58    TokenType nextTokenType()
59    {
60        m_currentTokenType = lex(m_currentElement);
61        return m_currentTokenType;
62    }
63
64    const IDBKeyPathElement& currentElement() { return m_currentElement; }
65
66private:
67    TokenType lex(IDBKeyPathElement&);
68    TokenType lexIdentifier(IDBKeyPathElement&);
69    TokenType lexNumber(IDBKeyPathElement&);
70    IDBKeyPathElement m_currentElement;
71    String m_string;
72    const UChar* m_ptr;
73    const UChar* m_end;
74    TokenType m_currentTokenType;
75};
76
77IDBKeyPathLexer::TokenType IDBKeyPathLexer::lex(IDBKeyPathElement& element)
78{
79    while (m_ptr < m_end && isASCIISpace(*m_ptr))
80        ++m_ptr;
81
82    if (m_ptr >= m_end)
83        return TokenEnd;
84
85    ASSERT(m_ptr < m_end);
86    switch (*m_ptr) {
87    case '[':
88        ++m_ptr;
89        return TokenLeftBracket;
90    case ']':
91        ++m_ptr;
92        return TokenRightBracket;
93    case '.':
94        ++m_ptr;
95        return TokenDot;
96    case '0':
97    case '1':
98    case '2':
99    case '3':
100    case '4':
101    case '5':
102    case '6':
103    case '7':
104    case '8':
105    case '9':
106        return lexNumber(element);
107    default:
108        return lexIdentifier(element);
109    }
110    return TokenError;
111}
112
113static inline bool isSafeIdentifierStartCharacter(UChar c)
114{
115    return isASCIIAlpha(c) || (c == '_') || (c == '$');
116}
117
118static inline bool isSafeIdentifierCharacter(UChar c)
119{
120    return isASCIIAlphanumeric(c) || (c == '_') || (c == '$');
121}
122
123IDBKeyPathLexer::TokenType IDBKeyPathLexer::lexIdentifier(IDBKeyPathElement& element)
124{
125    const UChar* start = m_ptr;
126    if (m_ptr < m_end && isSafeIdentifierStartCharacter(*m_ptr))
127        ++m_ptr;
128    else
129        return TokenError;
130
131    while (m_ptr < m_end && isSafeIdentifierCharacter(*m_ptr))
132        ++m_ptr;
133
134    element.type = IDBKeyPathElement::IsNamed;
135    element.identifier = String(start, m_ptr - start);
136    return TokenIdentifier;
137}
138
139IDBKeyPathLexer::TokenType IDBKeyPathLexer::lexNumber(IDBKeyPathElement& element)
140{
141    if (m_ptr >= m_end)
142        return TokenError;
143
144    const UChar* start = m_ptr;
145    // [0-9]*
146    while (m_ptr < m_end && isASCIIDigit(*m_ptr))
147        ++m_ptr;
148
149    String numberAsString;
150    numberAsString = String(start, m_ptr - start);
151    bool ok = false;
152    unsigned number = numberAsString.toUIntStrict(&ok);
153    if (!ok)
154        return TokenError;
155
156    element.type = IDBKeyPathElement::IsIndexed;
157    element.index = number;
158    return TokenNumber;
159}
160
161void IDBParseKeyPath(const String& keyPath, Vector<IDBKeyPathElement>& elements, IDBKeyPathParseError& error)
162{
163    // This is a simplified parser loosely based on LiteralParser.
164    // An IDBKeyPath is defined as a sequence of:
165    // identifierA{.identifierB{[numeric_value]}
166    // where "{}" represents an optional part
167    // The basic state machine is:
168    // Start => {Identifier, Array}
169    // Identifier => {Dot, Array, End}
170    // Array => {Start, Dot, End}
171    // Dot => {Identifier}
172    // It bails out as soon as it finds an error, but doesn't discard the bits it managed to parse.
173    enum ParserState { Identifier, Array, Dot, End };
174
175    IDBKeyPathLexer lexer(keyPath);
176    IDBKeyPathLexer::TokenType tokenType = lexer.nextTokenType();
177    ParserState state;
178    if (tokenType == IDBKeyPathLexer::TokenIdentifier)
179        state = Identifier;
180    else if (tokenType == IDBKeyPathLexer::TokenLeftBracket)
181        state = Array;
182    else if (tokenType == IDBKeyPathLexer::TokenEnd)
183        state = End;
184    else {
185        error = IDBKeyPathParseErrorStart;
186        return;
187    }
188
189    while (1) {
190        switch (state) {
191        case Identifier : {
192            IDBKeyPathLexer::TokenType tokenType = lexer.currentTokenType();
193            ASSERT(tokenType == IDBKeyPathLexer::TokenIdentifier);
194
195            IDBKeyPathElement element = lexer.currentElement();
196            ASSERT(element.type == IDBKeyPathElement::IsNamed);
197            elements.append(element);
198
199            tokenType = lexer.nextTokenType();
200            if (tokenType == IDBKeyPathLexer::TokenDot)
201                state = Dot;
202            else if (tokenType == IDBKeyPathLexer::TokenLeftBracket)
203                state = Array;
204            else if (tokenType == IDBKeyPathLexer::TokenEnd)
205                state = End;
206            else {
207                error = IDBKeyPathParseErrorIdentifier;
208                return;
209            }
210            break;
211        }
212        case Array : {
213            IDBKeyPathLexer::TokenType tokenType = lexer.currentTokenType();
214            ASSERT(tokenType == IDBKeyPathLexer::TokenLeftBracket);
215
216            tokenType = lexer.nextTokenType();
217            if (tokenType != IDBKeyPathLexer::TokenNumber) {
218                error = IDBKeyPathParseErrorArrayIndex;
219                return;
220            }
221
222            ASSERT(tokenType == IDBKeyPathLexer::TokenNumber);
223            IDBKeyPathElement element = lexer.currentElement();
224            ASSERT(element.type == IDBKeyPathElement::IsIndexed);
225            elements.append(element);
226
227            tokenType = lexer.nextTokenType();
228            if (tokenType != IDBKeyPathLexer::TokenRightBracket) {
229                error = IDBKeyPathParseErrorArrayIndex;
230                return;
231            }
232
233            tokenType = lexer.nextTokenType();
234            if (tokenType == IDBKeyPathLexer::TokenDot)
235                state = Dot;
236            else if (tokenType == IDBKeyPathLexer::TokenLeftBracket)
237                state = Array;
238            else if (tokenType == IDBKeyPathLexer::TokenEnd)
239                state = End;
240            else {
241                error = IDBKeyPathParseErrorAfterArray;
242                return;
243            }
244            break;
245        }
246        case Dot: {
247            IDBKeyPathLexer::TokenType tokenType = lexer.currentTokenType();
248            ASSERT(tokenType == IDBKeyPathLexer::TokenDot);
249
250            tokenType = lexer.nextTokenType();
251            if (tokenType != IDBKeyPathLexer::TokenIdentifier) {
252                error = IDBKeyPathParseErrorDot;
253                return;
254            }
255
256            state = Identifier;
257            break;
258        }
259        case End: {
260            error = IDBKeyPathParseErrorNone;
261            return;
262        }
263        }
264    }
265}
266
267} // namespace WebCore
268
269#endif // ENABLE(INDEXED_DATABASE)
270