1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
6#define MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
7
8#include <deque>
9#include <map>
10#include <set>
11#include <string>
12
13#include "base/memory/scoped_ptr.h"
14#include "media/base/media_export.h"
15#include "media/base/media_log.h"
16#include "media/base/stream_parser.h"
17#include "media/base/stream_parser_buffer.h"
18#include "media/formats/webm/webm_parser.h"
19#include "media/formats/webm/webm_tracks_parser.h"
20
21namespace media {
22
23class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
24 public:
25  typedef StreamParser::TrackId TrackId;
26  typedef std::deque<scoped_refptr<StreamParserBuffer> > BufferQueue;
27  typedef std::map<TrackId, const BufferQueue> TextBufferQueueMap;
28
29  // Arbitrarily-chosen numbers to estimate the duration of a buffer if none is
30  // set and there is not enough information to get a better estimate.
31  // TODO(wolenetz/acolwell): Parse audio codebook to determine missing audio
32  // frame durations. See http://crbug.com/351166.
33  enum {
34    kDefaultAudioBufferDurationInMs = 23,  // Common 1k samples @44.1kHz
35    kDefaultVideoBufferDurationInMs = 42  // Low 24fps to reduce stalls
36  };
37
38 private:
39  // Helper class that manages per-track state.
40  class Track {
41   public:
42    Track(int track_num,
43          bool is_video,
44          base::TimeDelta default_duration,
45          const LogCB& log_cb);
46    ~Track();
47
48    int track_num() const { return track_num_; }
49
50    // If a buffer is currently held aside pending duration calculation, returns
51    // its decode timestamp. Otherwise, returns kInfiniteDuration().
52    DecodeTimestamp GetReadyUpperBound();
53
54    // Prepares |ready_buffers_| for retrieval. Prior to calling,
55    // |ready_buffers_| must be empty. Moves all |buffers_| with decode
56    // timestamp before |before_timestamp| to |ready_buffers_|, preserving their
57    // order.
58    void ExtractReadyBuffers(const DecodeTimestamp before_timestamp);
59
60    const BufferQueue& ready_buffers() const { return ready_buffers_; }
61
62    // If |last_added_buffer_missing_duration_| is set, updates its duration
63    // relative to |buffer|'s timestamp, and adds it to |buffers_| and unsets
64    // |last_added_buffer_missing_duration_|. Then, if |buffer| is missing
65    // duration, saves |buffer| into |last_added_buffer_missing_duration_|, or
66    // otherwise adds |buffer| to |buffers_|.
67    bool AddBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
68
69    // If |last_added_buffer_missing_duration_| is set, updates its duration to
70    // be non-kNoTimestamp() value of |estimated_next_frame_duration_| or an
71    // arbitrary default, then adds it to |buffers_| and unsets
72    // |last_added_buffer_missing_duration_|. (This method helps stream parser
73    // emit all buffers in a media segment before signaling end of segment.)
74    void ApplyDurationEstimateIfNeeded();
75
76    // Clears |ready_buffers_| (use ExtractReadyBuffers() to fill it again).
77    // Leaves as-is |buffers_| and any possibly held-aside buffer that is
78    // missing duration.
79    void ClearReadyBuffers();
80
81    // Clears all buffer state, including any possibly held-aside buffer that
82    // was missing duration, and all contents of |buffers_| and
83    // |ready_buffers_|.
84    void Reset();
85
86    // Helper function used to inspect block data to determine if the
87    // block is a keyframe.
88    // |data| contains the bytes in the block.
89    // |size| indicates the number of bytes in |data|.
90    bool IsKeyframe(const uint8* data, int size) const;
91
92    base::TimeDelta default_duration() const { return default_duration_; }
93
94   private:
95    // Helper that sanity-checks |buffer| duration, updates
96    // |estimated_next_frame_duration_|, and adds |buffer| to |buffers_|.
97    // Returns false if |buffer| failed sanity check and therefore was not added
98    // to |buffers_|. Returns true otherwise.
99    bool QueueBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
100
101    // Helper that calculates the buffer duration to use in
102    // ApplyDurationEstimateIfNeeded().
103    base::TimeDelta GetDurationEstimate();
104
105    int track_num_;
106    bool is_video_;
107
108    // Parsed track buffers, each with duration and in (decode) timestamp order,
109    // that have not yet been extracted into |ready_buffers_|. Note that up to
110    // one additional buffer missing duration may be tracked by
111    // |last_added_buffer_missing_duration_|.
112    BufferQueue buffers_;
113    scoped_refptr<StreamParserBuffer> last_added_buffer_missing_duration_;
114
115    // Buffers in (decode) timestamp order that were previously parsed into and
116    // extracted from |buffers_|. Buffers are moved from |buffers_| to
117    // |ready_buffers_| by ExtractReadyBuffers() if they are below a specified
118    // upper bound timestamp. Track users can therefore extract only those
119    // parsed buffers which are "ready" for emission (all before some maximum
120    // timestamp).
121    BufferQueue ready_buffers_;
122
123    // If kNoTimestamp(), then |estimated_next_frame_duration_| will be used.
124    base::TimeDelta default_duration_;
125
126    // If kNoTimestamp(), then a default value will be used. This estimate is
127    // the maximum duration seen or derived so far for this track, and is valid
128    // only if |default_duration_| is kNoTimestamp().
129    base::TimeDelta estimated_next_frame_duration_;
130
131    LogCB log_cb_;
132  };
133
134  typedef std::map<int, Track> TextTrackMap;
135
136 public:
137  WebMClusterParser(int64 timecode_scale,
138                    int audio_track_num,
139                    base::TimeDelta audio_default_duration,
140                    int video_track_num,
141                    base::TimeDelta video_default_duration,
142                    const WebMTracksParser::TextTracks& text_tracks,
143                    const std::set<int64>& ignored_tracks,
144                    const std::string& audio_encryption_key_id,
145                    const std::string& video_encryption_key_id,
146                    const LogCB& log_cb);
147  virtual ~WebMClusterParser();
148
149  // Resets the parser state so it can accept a new cluster.
150  void Reset();
151
152  // Parses a WebM cluster element in |buf|.
153  //
154  // Returns -1 if the parse fails.
155  // Returns 0 if more data is needed.
156  // Returns the number of bytes parsed on success.
157  int Parse(const uint8* buf, int size);
158
159  base::TimeDelta cluster_start_time() const { return cluster_start_time_; }
160
161  // Get the current ready buffers resulting from Parse().
162  // If the parse reached the end of cluster and the last buffer was held aside
163  // due to missing duration, the buffer is given an estimated duration and
164  // included in the result.
165  // Otherwise, if there are is a buffer held aside due to missing duration for
166  // any of the tracks, no buffers with same or greater (decode) timestamp will
167  // be included in the buffers.
168  // The returned deques are cleared by Parse() or Reset() and updated by the
169  // next calls to Get{Audio,Video}Buffers().
170  // If no Parse() or Reset() has occurred since the last call to Get{Audio,
171  // Video,Text}Buffers(), then the previous BufferQueue& is returned again
172  // without any recalculation.
173  const BufferQueue& GetAudioBuffers();
174  const BufferQueue& GetVideoBuffers();
175
176  // Constructs and returns a subset of |text_track_map_| containing only
177  // tracks with non-empty buffer queues produced by the last Parse() and
178  // filtered to exclude any buffers that have (decode) timestamp same or
179  // greater than the lowest (decode) timestamp across all tracks of any buffer
180  // held aside due to missing duration (unless the end of cluster has been
181  // reached).
182  // The returned map is cleared by Parse() or Reset() and updated by the next
183  // call to GetTextBuffers().
184  // If no Parse() or Reset() has occurred since the last call to
185  // GetTextBuffers(), then the previous TextBufferQueueMap& is returned again
186  // without any recalculation.
187  const TextBufferQueueMap& GetTextBuffers();
188
189  // Returns true if the last Parse() call stopped at the end of a cluster.
190  bool cluster_ended() const { return cluster_ended_; }
191
192 private:
193  // WebMParserClient methods.
194  virtual WebMParserClient* OnListStart(int id) OVERRIDE;
195  virtual bool OnListEnd(int id) OVERRIDE;
196  virtual bool OnUInt(int id, int64 val) OVERRIDE;
197  virtual bool OnBinary(int id, const uint8* data, int size) OVERRIDE;
198
199  bool ParseBlock(bool is_simple_block, const uint8* buf, int size,
200                  const uint8* additional, int additional_size, int duration,
201                  int64 discard_padding);
202  bool OnBlock(bool is_simple_block, int track_num, int timecode, int duration,
203               int flags, const uint8* data, int size,
204               const uint8* additional, int additional_size,
205               int64 discard_padding);
206
207  // Resets the Track objects associated with each text track.
208  void ResetTextTracks();
209
210  // Clears the the ready buffers associated with each text track.
211  void ClearTextTrackReadyBuffers();
212
213  // Helper method for Get{Audio,Video,Text}Buffers() that recomputes
214  // |ready_buffer_upper_bound_| and calls ExtractReadyBuffers() on each track.
215  // If |cluster_ended_| is true, first applies duration estimate if needed for
216  // |audio_| and |video_| and sets |ready_buffer_upper_bound_| to
217  // kInfiniteDuration(). Otherwise, sets |ready_buffer_upper_bound_| to the
218  // minimum upper bound across |audio_| and |video_|. (Text tracks can have no
219  // buffers missing duration, so they are not involved in calculating the upper
220  // bound.)
221  // Parse() or Reset() must be called between calls to UpdateReadyBuffers() to
222  // clear each track's ready buffers and to reset |ready_buffer_upper_bound_|
223  // to kNoDecodeTimestamp().
224  void UpdateReadyBuffers();
225
226  // Search for the indicated track_num among the text tracks.  Returns NULL
227  // if that track num is not a text track.
228  Track* FindTextTrack(int track_num);
229
230  double timecode_multiplier_;  // Multiplier used to convert timecodes into
231                                // microseconds.
232  std::set<int64> ignored_tracks_;
233  std::string audio_encryption_key_id_;
234  std::string video_encryption_key_id_;
235
236  WebMListParser parser_;
237
238  int64 last_block_timecode_;
239  scoped_ptr<uint8[]> block_data_;
240  int block_data_size_;
241  int64 block_duration_;
242  int64 block_add_id_;
243  scoped_ptr<uint8[]> block_additional_data_;
244  int block_additional_data_size_;
245  int64 discard_padding_;
246  bool discard_padding_set_;
247
248  int64 cluster_timecode_;
249  base::TimeDelta cluster_start_time_;
250  bool cluster_ended_;
251
252  Track audio_;
253  Track video_;
254  TextTrackMap text_track_map_;
255
256  // Subset of |text_track_map_| maintained by GetTextBuffers(), and cleared by
257  // ClearTextTrackReadyBuffers(). Callers of GetTextBuffers() get a const-ref
258  // to this member.
259  TextBufferQueueMap text_buffers_map_;
260
261  // Limits the range of buffers returned by Get{Audio,Video,Text}Buffers() to
262  // this exclusive upper bound. Set to kNoDecodeTimestamp(), meaning not yet
263  // calculated, by Reset() and Parse(). If kNoDecodeTimestamp(), then
264  // Get{Audio,Video,Text}Buffers() will calculate it to be the minimum (decode)
265  // timestamp across all tracks' |last_buffer_missing_duration_|, or
266  // kInfiniteDuration() if no buffers are currently missing duration.
267  DecodeTimestamp ready_buffer_upper_bound_;
268
269  LogCB log_cb_;
270
271  DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser);
272};
273
274}  // namespace media
275
276#endif  // MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
277