1/*
2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved.
4 * Copyright (C) 2008 Eric Seidel <eric@webkit.org>
5 * Copyright (C) 2009 - 2010  Torch Mobile (Beijing) Co. Ltd. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB.  If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 */
22
23#ifndef CSSTokenizer_h
24#define CSSTokenizer_h
25
26#include "wtf/Noncopyable.h"
27#include "wtf/OwnPtr.h"
28#include "wtf/text/WTFString.h"
29
30namespace blink {
31
32class BisonCSSParser;
33struct CSSParserLocation;
34struct CSSParserString;
35
36class CSSTokenizer {
37    WTF_MAKE_NONCOPYABLE(CSSTokenizer);
38public:
39    // FIXME: This should not be needed but there are still some ties between the 2 classes.
40    friend class BisonCSSParser;
41
42    CSSTokenizer(BisonCSSParser& parser)
43        : m_parser(parser)
44        , m_parsedTextPrefixLength(0)
45        , m_parsedTextSuffixLength(0)
46        , m_parsingMode(NormalMode)
47        , m_is8BitSource(false)
48        , m_length(0)
49        , m_token(0)
50        , m_lineNumber(0)
51        , m_tokenStartLineNumber(0)
52        , m_internal(true)
53    {
54        m_tokenStart.ptr8 = 0;
55    }
56
57    void setupTokenizer(const char* prefix, unsigned prefixLength, const String&, const char* suffix, unsigned suffixLength);
58
59    CSSParserLocation currentLocation();
60
61    inline int lex(void* yylval) { return (this->*m_lexFunc)(yylval); }
62
63    inline unsigned safeUserStringTokenOffset()
64    {
65        return std::min(tokenStartOffset(), static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength)) - m_parsedTextPrefixLength;
66    }
67
68    bool is8BitSource() const { return m_is8BitSource; }
69
70    // FIXME: These 2 functions should be private so that we don't need the definitions below.
71    template <typename CharacterType>
72    inline CharacterType* tokenStart();
73
74    inline unsigned tokenStartOffset();
75
76private:
77    UChar* allocateStringBuffer16(size_t len);
78
79    template <typename CharacterType>
80    inline CharacterType*& currentCharacter();
81
82    template <typename CharacterType>
83    inline CharacterType* dataStart();
84
85    template <typename CharacterType>
86    inline void setTokenStart(CharacterType*);
87
88    template <typename CharacterType>
89    inline bool isIdentifierStart();
90
91    template <typename CharacterType>
92    inline CSSParserLocation tokenLocation();
93
94    template <typename CharacterType>
95    static unsigned parseEscape(CharacterType*&);
96    template <typename DestCharacterType>
97    static inline void UnicodeToChars(DestCharacterType*&, unsigned);
98
99    template <typename SrcCharacterType, typename DestCharacterType>
100    static inline bool parseIdentifierInternal(SrcCharacterType*&, DestCharacterType*&, bool&);
101    template <typename SrcCharacterType>
102    static size_t peekMaxIdentifierLen(SrcCharacterType*);
103    template <typename CharacterType>
104    inline void parseIdentifier(CharacterType*&, CSSParserString&, bool&);
105
106    template <typename SrcCharacterType>
107    static size_t peekMaxStringLen(SrcCharacterType*, UChar quote);
108    template <typename SrcCharacterType, typename DestCharacterType>
109    static inline bool parseStringInternal(SrcCharacterType*&, DestCharacterType*&, UChar);
110    template <typename CharacterType>
111    inline void parseString(CharacterType*&, CSSParserString& resultString, UChar);
112
113    template <typename CharacterType>
114    inline bool findURI(CharacterType*& start, CharacterType*& end, UChar& quote);
115    template <typename SrcCharacterType>
116    static size_t peekMaxURILen(SrcCharacterType*, UChar quote);
117    template <typename SrcCharacterType, typename DestCharacterType>
118    static inline bool parseURIInternal(SrcCharacterType*&, DestCharacterType*&, UChar quote);
119    template <typename CharacterType>
120    inline void parseURI(CSSParserString&);
121
122    template <typename CharacterType>
123    inline bool parseUnicodeRange();
124    template <typename CharacterType>
125    bool parseNthChild();
126    template <typename CharacterType>
127    bool parseNthChildExtra();
128    template <typename CharacterType>
129    inline bool detectFunctionTypeToken(int);
130    template <typename CharacterType>
131    inline void detectMediaQueryToken(int);
132    template <typename CharacterType>
133    inline void detectNumberToken(CharacterType*, int);
134    template <typename CharacterType>
135    inline void detectDashToken(int);
136    template <typename CharacterType>
137    inline void detectAtToken(int, bool);
138    template <typename CharacterType>
139    inline void detectSupportsToken(int);
140
141    template <typename SourceCharacterType>
142    int realLex(void* yylval);
143
144    BisonCSSParser& m_parser;
145
146    size_t m_parsedTextPrefixLength;
147    size_t m_parsedTextSuffixLength;
148
149    enum ParsingMode {
150        NormalMode,
151        MediaQueryMode,
152        SupportsMode,
153        NthChildMode
154    };
155
156    ParsingMode m_parsingMode;
157    bool m_is8BitSource;
158    OwnPtr<LChar[]> m_dataStart8;
159    OwnPtr<UChar[]> m_dataStart16;
160    LChar* m_currentCharacter8;
161    UChar* m_currentCharacter16;
162
163    // During parsing of an ASCII stylesheet we might locate escape
164    // sequences that expand into UTF-16 code points. Strings,
165    // identifiers and URIs containing such escape sequences are
166    // stored in m_cssStrings16 so that we don't have to store the
167    // whole stylesheet as UTF-16.
168    Vector<OwnPtr<UChar[]> > m_cssStrings16;
169    union {
170        LChar* ptr8;
171        UChar* ptr16;
172    } m_tokenStart;
173    unsigned m_length;
174    int m_token;
175    int m_lineNumber;
176    int m_tokenStartLineNumber;
177
178    // FIXME: This boolean is misnamed. Also it would be nice if we could consolidate it
179    // with the CSSParserMode logic to determine if internal properties are allowed.
180    bool m_internal;
181
182    int (CSSTokenizer::*m_lexFunc)(void*);
183};
184
185inline unsigned CSSTokenizer::tokenStartOffset()
186{
187    if (is8BitSource())
188        return m_tokenStart.ptr8 - m_dataStart8.get();
189    return m_tokenStart.ptr16 - m_dataStart16.get();
190}
191
192template <>
193inline LChar* CSSTokenizer::tokenStart<LChar>()
194{
195    return m_tokenStart.ptr8;
196}
197
198template <>
199inline UChar* CSSTokenizer::tokenStart<UChar>()
200{
201    return m_tokenStart.ptr16;
202}
203
204} // namespace blink
205
206#endif // CSSTokenizer_h
207