1/*
2 * Copyright (C) 2011 Google Inc.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#ifndef VTTParser_h
32#define VTTParser_h
33
34#include "core/HTMLNames.h"
35#include "core/dom/DocumentFragment.h"
36#include "core/html/parser/TextResourceDecoder.h"
37#include "core/html/track/vtt/BufferedLineReader.h"
38#include "core/html/track/vtt/VTTCue.h"
39#include "core/html/track/vtt/VTTRegion.h"
40#include "core/html/track/vtt/VTTTokenizer.h"
41#include "platform/heap/Handle.h"
42#include "wtf/PassOwnPtr.h"
43#include "wtf/text/StringBuilder.h"
44
45namespace blink {
46
47class Document;
48class VTTScanner;
49
50class VTTParserClient {
51public:
52    virtual ~VTTParserClient() { }
53
54    virtual void newCuesParsed() = 0;
55    virtual void newRegionsParsed() = 0;
56    virtual void fileFailedToParse() = 0;
57};
58
59class VTTParser FINAL : public NoBaseWillBeGarbageCollectedFinalized<VTTParser> {
60public:
61    enum ParseState {
62        Initial,
63        Header,
64        Id,
65        TimingsAndSettings,
66        CueText,
67        BadCue
68    };
69
70    static PassOwnPtrWillBeRawPtr<VTTParser> create(VTTParserClient* client, Document& document)
71    {
72        return adoptPtrWillBeNoop(new VTTParser(client, document));
73    }
74
75    static inline bool isRecognizedTag(const AtomicString& tagName)
76    {
77        return tagName == HTMLNames::iTag
78            || tagName == HTMLNames::bTag
79            || tagName == HTMLNames::uTag
80            || tagName == HTMLNames::rubyTag
81            || tagName == HTMLNames::rtTag;
82    }
83    static inline bool isASpace(UChar c)
84    {
85        // WebVTT space characters are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), U+000A LINE FEED (LF), U+000C FORM FEED (FF), and U+000D CARRIAGE RETURN    (CR).
86        return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
87    }
88    static inline bool isValidSettingDelimiter(UChar c)
89    {
90        // ... a WebVTT cue consists of zero or more of the following components, in any order, separated from each other by one or more
91        // U+0020 SPACE characters or U+0009 CHARACTER TABULATION (tab) characters.
92        return c == ' ' || c == '\t';
93    }
94    static bool collectTimeStamp(const String&, double& timeStamp);
95
96    // Useful functions for parsing percentage settings.
97    static bool parseFloatPercentageValue(VTTScanner& valueScanner, float& percentage);
98    static bool parseFloatPercentageValuePair(VTTScanner&, char, FloatPoint&);
99
100    // Create the DocumentFragment representation of the WebVTT cue text.
101    static PassRefPtrWillBeRawPtr<DocumentFragment> createDocumentFragmentFromCueText(Document&, const String&);
102
103    // Input data to the parser to parse.
104    void parseBytes(const char* data, unsigned length);
105    void flush();
106
107    // Transfers ownership of last parsed cues to caller.
108    void getNewCues(WillBeHeapVector<RefPtrWillBeMember<VTTCue> >&);
109    void getNewRegions(WillBeHeapVector<RefPtrWillBeMember<VTTRegion> >&);
110
111    void trace(Visitor*);
112
113private:
114    VTTParser(VTTParserClient*, Document&);
115
116    RawPtrWillBeMember<Document> m_document;
117    ParseState m_state;
118
119    void parse();
120    void flushPendingCue();
121    bool hasRequiredFileIdentifier(const String& line);
122    ParseState collectCueId(const String&);
123    ParseState collectTimingsAndSettings(const String&);
124    ParseState collectCueText(const String&);
125    ParseState recoverCue(const String&);
126    ParseState ignoreBadCue(const String&);
127
128    void createNewCue();
129    void resetCueValues();
130
131    void collectMetadataHeader(const String&);
132    void createNewRegion(const String& headerValue);
133
134    static bool collectTimeStamp(VTTScanner& input, double& timeStamp);
135
136    BufferedLineReader m_lineReader;
137    OwnPtr<TextResourceDecoder> m_decoder;
138    AtomicString m_currentId;
139    double m_currentStartTime;
140    double m_currentEndTime;
141    StringBuilder m_currentContent;
142    String m_currentSettings;
143
144    VTTParserClient* m_client;
145
146    WillBeHeapVector<RefPtrWillBeMember<VTTCue> > m_cueList;
147
148    WillBeHeapVector<RefPtrWillBeMember<VTTRegion> > m_regionList;
149};
150
151} // namespace blink
152
153#endif
154