1/*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "modules/indexeddb/IDBKeyPath.h"
28
29#include "wtf/ASCIICType.h"
30#include "wtf/dtoa.h"
31#include "wtf/unicode/CharacterNames.h"
32#include "wtf/unicode/Unicode.h"
33
34using namespace WTF::Unicode;
35
36namespace blink {
37
38class IDBKeyPathLexer {
39public:
40    enum TokenType {
41        TokenIdentifier,
42        TokenDot,
43        TokenEnd,
44        TokenError
45    };
46
47    explicit IDBKeyPathLexer(const String& s)
48        : m_string(s)
49        , m_length(s.length())
50        , m_index(0)
51        , m_currentTokenType(TokenError)
52    {
53    }
54
55    TokenType currentTokenType() const { return m_currentTokenType; }
56
57    TokenType nextTokenType()
58    {
59        m_currentTokenType = lex(m_currentElement);
60        return m_currentTokenType;
61    }
62
63    const String& currentElement() { return m_currentElement; }
64
65private:
66    TokenType lex(String&);
67    TokenType lexIdentifier(String&);
68    String m_currentElement;
69    const String m_string;
70    const unsigned m_length;
71    unsigned m_index;
72    TokenType m_currentTokenType;
73};
74
75IDBKeyPathLexer::TokenType IDBKeyPathLexer::lex(String& element)
76{
77    if (m_index >= m_length)
78        return TokenEnd;
79    ASSERT(m_index < m_length);
80
81    if (m_string[m_index] == '.') {
82        ++m_index;
83        return TokenDot;
84    }
85    return lexIdentifier(element);
86}
87
88namespace {
89
90using namespace WTF::Unicode;
91
92// The following correspond to grammar in ECMA-262.
93const uint32_t unicodeLetter = Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other | Number_Letter;
94const uint32_t unicodeCombiningMark = Mark_NonSpacing | Mark_SpacingCombining;
95const uint32_t unicodeDigit = Number_DecimalDigit;
96const uint32_t unicodeConnectorPunctuation = Punctuation_Connector;
97
98static inline bool isIdentifierStartCharacter(UChar c)
99{
100    return (category(c) & unicodeLetter) || (c == '$') || (c == '_');
101}
102
103static inline bool isIdentifierCharacter(UChar c)
104{
105    return (category(c) & (unicodeLetter | unicodeCombiningMark | unicodeDigit | unicodeConnectorPunctuation)) || (c == '$') || (c == '_') || (c == zeroWidthNonJoiner) || (c == zeroWidthJoiner);
106}
107
108} // namespace
109
110IDBKeyPathLexer::TokenType IDBKeyPathLexer::lexIdentifier(String& element)
111{
112    unsigned start = m_index;
113    if (m_index < m_length && isIdentifierStartCharacter(m_string[m_index]))
114        ++m_index;
115    else
116        return TokenError;
117
118    while (m_index < m_length && isIdentifierCharacter(m_string[m_index]))
119        ++m_index;
120
121    element = m_string.substring(start, m_index - start);
122    return TokenIdentifier;
123}
124
125bool IDBIsValidKeyPath(const String& keyPath)
126{
127    IDBKeyPathParseError error;
128    Vector<String> keyPathElements;
129    IDBParseKeyPath(keyPath, keyPathElements, error);
130    return error == IDBKeyPathParseErrorNone;
131}
132
133void IDBParseKeyPath(const String& keyPath, Vector<String>& elements, IDBKeyPathParseError& error)
134{
135    // IDBKeyPath ::= EMPTY_STRING | identifier ('.' identifier)*
136    // The basic state machine is:
137    //   Start => {Identifier, End}
138    //   Identifier => {Dot, End}
139    //   Dot => {Identifier}
140    // It bails out as soon as it finds an error, but doesn't discard the bits it managed to parse.
141    enum ParserState { Identifier, Dot, End };
142
143    IDBKeyPathLexer lexer(keyPath);
144    IDBKeyPathLexer::TokenType tokenType = lexer.nextTokenType();
145    ParserState state;
146    if (tokenType == IDBKeyPathLexer::TokenIdentifier)
147        state = Identifier;
148    else if (tokenType == IDBKeyPathLexer::TokenEnd)
149        state = End;
150    else {
151        error = IDBKeyPathParseErrorStart;
152        return;
153    }
154
155    while (1) {
156        switch (state) {
157        case Identifier : {
158            IDBKeyPathLexer::TokenType tokenType = lexer.currentTokenType();
159            ASSERT(tokenType == IDBKeyPathLexer::TokenIdentifier);
160
161            String element = lexer.currentElement();
162            elements.append(element);
163
164            tokenType = lexer.nextTokenType();
165            if (tokenType == IDBKeyPathLexer::TokenDot)
166                state = Dot;
167            else if (tokenType == IDBKeyPathLexer::TokenEnd)
168                state = End;
169            else {
170                error = IDBKeyPathParseErrorIdentifier;
171                return;
172            }
173            break;
174        }
175        case Dot: {
176            IDBKeyPathLexer::TokenType tokenType = lexer.currentTokenType();
177            ASSERT(tokenType == IDBKeyPathLexer::TokenDot);
178
179            tokenType = lexer.nextTokenType();
180            if (tokenType == IDBKeyPathLexer::TokenIdentifier)
181                state = Identifier;
182            else {
183                error = IDBKeyPathParseErrorDot;
184                return;
185            }
186            break;
187        }
188        case End: {
189            error = IDBKeyPathParseErrorNone;
190            return;
191        }
192        }
193    }
194}
195
196IDBKeyPath::IDBKeyPath(const String& string)
197    : m_type(StringType)
198    , m_string(string)
199{
200    ASSERT(!m_string.isNull());
201}
202
203IDBKeyPath::IDBKeyPath(const Vector<String>& array)
204    : m_type(ArrayType)
205    , m_array(array)
206{
207#if ENABLE(ASSERT)
208    for (size_t i = 0; i < m_array.size(); ++i)
209        ASSERT(!m_array[i].isNull());
210#endif
211}
212
213bool IDBKeyPath::isValid() const
214{
215    switch (m_type) {
216    case NullType:
217        return false;
218
219    case StringType:
220        return IDBIsValidKeyPath(m_string);
221
222    case ArrayType:
223        if (m_array.isEmpty())
224            return false;
225        for (size_t i = 0; i < m_array.size(); ++i) {
226            if (!IDBIsValidKeyPath(m_array[i]))
227                return false;
228        }
229        return true;
230    }
231    ASSERT_NOT_REACHED();
232    return false;
233}
234
235bool IDBKeyPath::operator==(const IDBKeyPath& other) const
236{
237    if (m_type != other.m_type)
238        return false;
239
240    switch (m_type) {
241    case NullType:
242        return true;
243    case StringType:
244        return m_string == other.m_string;
245    case ArrayType:
246        return m_array == other.m_array;
247    }
248    ASSERT_NOT_REACHED();
249    return false;
250}
251
252} // namespace blink
253