1/*
2 * Copyright (c) 2013, Opera Software ASA. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Neither the name of Opera Software ASA nor the names of its
13 *    contributors may be used to endorse or promote products derived
14 *    from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
27 * OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#ifndef VTTScanner_h
31#define VTTScanner_h
32
33#include "platform/ParsingUtilities.h"
34#include "wtf/text/WTFString.h"
35
36namespace WebCore {
37
38// Helper class for "scanning" an input string and performing parsing of
39// "micro-syntax"-like constructs.
40//
41// There's two primary operations: match and scan.
42//
43// The 'match' operation matches an explicitly or implicitly specified sequence
44// against the characters ahead of the current input pointer, and returns true
45// if the sequence can be matched.
46//
47// The 'scan' operation performs a 'match', and if the match is successful it
48// advance the input pointer past the matched sequence.
49class VTTScanner {
50    WTF_MAKE_NONCOPYABLE(VTTScanner);
51public:
52    explicit VTTScanner(const String& line);
53
54    typedef const LChar* Position;
55
56    class Run {
57    public:
58        Run(Position start, Position end, bool is8Bit)
59            : m_start(start), m_end(end), m_is8Bit(is8Bit) { }
60
61        Position start() const { return m_start; }
62        Position end() const { return m_end; }
63
64        bool isEmpty() const { return m_start == m_end; }
65        size_t length() const;
66
67    private:
68        Position m_start;
69        Position m_end;
70        bool m_is8Bit;
71    };
72
73    // Check if the input pointer points at the specified position.
74    bool isAt(Position checkPosition) const { return position() == checkPosition; }
75    // Check if the input pointer points at the end of the input.
76    bool isAtEnd() const { return position() == end(); }
77    // Match the character |c| against the character at the input pointer (~lookahead).
78    bool match(char c) const { return !isAtEnd() && currentChar() == c; }
79    // Scan the character |c|.
80    bool scan(char);
81    // Scan the first |charactersCount| characters of the string |characters|.
82    bool scan(const LChar* characters, size_t charactersCount);
83
84    // Scan the literal |characters|.
85    template<unsigned charactersCount>
86    bool scan(const char (&characters)[charactersCount]);
87
88    // Skip (advance the input pointer) as long as the specified
89    // |characterPredicate| returns true, and the input pointer is not passed
90    // the end of the input.
91    template<bool characterPredicate(UChar)>
92    void skipWhile();
93
94    // Like skipWhile, but using a negated predicate.
95    template<bool characterPredicate(UChar)>
96    void skipUntil();
97
98    // Return the run of characters for which the specified
99    // |characterPredicate| returns true. The start of the run will be the
100    // current input pointer.
101    template<bool characterPredicate(UChar)>
102    Run collectWhile();
103
104    // Like collectWhile, but using a negated predicate.
105    template<bool characterPredicate(UChar)>
106    Run collectUntil();
107
108    // Scan the string |toMatch|, using the specified |run| as the sequence to
109    // match against.
110    bool scanRun(const Run&, const String& toMatch);
111
112    // Skip to the end of the specified |run|.
113    void skipRun(const Run&);
114
115    // Return the String made up of the characters in |run|, and advance the
116    // input pointer to the end of the run.
117    String extractString(const Run&);
118
119    // Return a String constructed from the rest of the input (between input
120    // pointer and end of input), and advance the input pointer accordingly.
121    String restOfInputAsString();
122
123    // Scan a set of ASCII digits from the input. Return the number of digits
124    // scanned, and set |number| to the computed value. If the digits make up a
125    // number that does not fit the 'int' type, |number| is set to INT_MAX.
126    // Note: Does not handle sign.
127    unsigned scanDigits(int& number);
128
129    // Scan a floating point value on one of the forms: \d+\.? \d+\.\d+ \.\d+
130    bool scanFloat(float& number);
131
132protected:
133    Position position() const { return m_data.characters8; }
134    Position end() const { return m_end.characters8; }
135    void seekTo(Position);
136    UChar currentChar() const;
137    void advance(unsigned amount = 1);
138    // Adapt a UChar-predicate to an LChar-predicate.
139    // (For use with skipWhile/Until from ParsingUtilities.h).
140    template<bool characterPredicate(UChar)>
141    static inline bool LCharPredicateAdapter(LChar c) { return characterPredicate(c); }
142    union {
143        const LChar* characters8;
144        const UChar* characters16;
145    } m_data;
146    union {
147        const LChar* characters8;
148        const UChar* characters16;
149    } m_end;
150    bool m_is8Bit;
151};
152
153inline size_t VTTScanner::Run::length() const
154{
155    if (m_is8Bit)
156        return m_end - m_start;
157    return reinterpret_cast<const UChar*>(m_end) - reinterpret_cast<const UChar*>(m_start);
158}
159
160template<unsigned charactersCount>
161inline bool VTTScanner::scan(const char (&characters)[charactersCount])
162{
163    return scan(reinterpret_cast<const LChar*>(characters), charactersCount - 1);
164}
165
166template<bool characterPredicate(UChar)>
167inline void VTTScanner::skipWhile()
168{
169    if (m_is8Bit)
170        ::skipWhile<LChar, LCharPredicateAdapter<characterPredicate> >(m_data.characters8, m_end.characters8);
171    else
172        ::skipWhile<UChar, characterPredicate>(m_data.characters16, m_end.characters16);
173}
174
175template<bool characterPredicate(UChar)>
176inline void VTTScanner::skipUntil()
177{
178    if (m_is8Bit)
179        ::skipUntil<LChar, LCharPredicateAdapter<characterPredicate> >(m_data.characters8, m_end.characters8);
180    else
181        ::skipUntil<UChar, characterPredicate>(m_data.characters16, m_end.characters16);
182}
183
184template<bool characterPredicate(UChar)>
185inline VTTScanner::Run VTTScanner::collectWhile()
186{
187    if (m_is8Bit) {
188        const LChar* current = m_data.characters8;
189        ::skipWhile<LChar, LCharPredicateAdapter<characterPredicate> >(current, m_end.characters8);
190        return Run(position(), current, m_is8Bit);
191    }
192    const UChar* current = m_data.characters16;
193    ::skipWhile<UChar, characterPredicate>(current, m_end.characters16);
194    return Run(position(), reinterpret_cast<Position>(current), m_is8Bit);
195}
196
197template<bool characterPredicate(UChar)>
198inline VTTScanner::Run VTTScanner::collectUntil()
199{
200    if (m_is8Bit) {
201        const LChar* current = m_data.characters8;
202        ::skipUntil<LChar, LCharPredicateAdapter<characterPredicate> >(current, m_end.characters8);
203        return Run(position(), current, m_is8Bit);
204    }
205    const UChar* current = m_data.characters16;
206    ::skipUntil<UChar, characterPredicate>(current, m_end.characters16);
207    return Run(position(), reinterpret_cast<Position>(current), m_is8Bit);
208}
209
210inline void VTTScanner::seekTo(Position position)
211{
212    ASSERT(position <= end());
213    m_data.characters8 = position;
214}
215
216inline UChar VTTScanner::currentChar() const
217{
218    ASSERT(position() < end());
219    return m_is8Bit ? *m_data.characters8 : *m_data.characters16;
220}
221
222inline void VTTScanner::advance(unsigned amount)
223{
224    ASSERT(position() < end());
225    if (m_is8Bit)
226        m_data.characters8 += amount;
227    else
228        m_data.characters16 += amount;
229}
230
231}
232
233#endif
234