track_run_iterator.cc revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "media/formats/mp4/track_run_iterator.h"
6
7#include <algorithm>
8
9#include "media/base/buffers.h"
10#include "media/base/stream_parser_buffer.h"
11#include "media/formats/mp4/rcheck.h"
12
13namespace {
14static const uint32 kSampleIsDifferenceSampleFlagMask = 0x10000;
15}
16
17namespace media {
18namespace mp4 {
19
20struct SampleInfo {
21  int size;
22  int duration;
23  int cts_offset;
24  bool is_keyframe;
25};
26
27struct TrackRunInfo {
28  uint32 track_id;
29  std::vector<SampleInfo> samples;
30  int64 timescale;
31  int64 start_dts;
32  int64 sample_start_offset;
33
34  bool is_audio;
35  const AudioSampleEntry* audio_description;
36  const VideoSampleEntry* video_description;
37
38  int64 aux_info_start_offset;  // Only valid if aux_info_total_size > 0.
39  int aux_info_default_size;
40  std::vector<uint8> aux_info_sizes;  // Populated if default_size == 0.
41  int aux_info_total_size;
42
43  TrackRunInfo();
44  ~TrackRunInfo();
45};
46
47TrackRunInfo::TrackRunInfo()
48    : track_id(0),
49      timescale(-1),
50      start_dts(-1),
51      sample_start_offset(-1),
52      is_audio(false),
53      aux_info_start_offset(-1),
54      aux_info_default_size(-1),
55      aux_info_total_size(-1) {
56}
57TrackRunInfo::~TrackRunInfo() {}
58
59TimeDelta TimeDeltaFromRational(int64 numer, int64 denom) {
60  // To avoid overflow, split the following calculation:
61  // (numer * base::Time::kMicrosecondsPerSecond) / denom
62  // into:
63  //  (numer / denom) * base::Time::kMicrosecondsPerSecond +
64  // ((numer % denom) * base::Time::kMicrosecondsPerSecond) / denom
65  int64 a = numer / denom;
66  DCHECK_LE((a > 0 ? a : -a), kint64max / base::Time::kMicrosecondsPerSecond);
67  int64 timea_in_us = a * base::Time::kMicrosecondsPerSecond;
68
69  int64 b = numer % denom;
70  DCHECK_LE((b > 0 ? b : -b), kint64max / base::Time::kMicrosecondsPerSecond);
71  int64 timeb_in_us = (b * base::Time::kMicrosecondsPerSecond) / denom;
72
73  DCHECK((timeb_in_us < 0) || (timea_in_us <= kint64max - timeb_in_us));
74  DCHECK((timeb_in_us > 0) || (timea_in_us >= kint64min - timeb_in_us));
75  return TimeDelta::FromMicroseconds(timea_in_us + timeb_in_us);
76}
77
78TrackRunIterator::TrackRunIterator(const Movie* moov,
79                                   const LogCB& log_cb)
80    : moov_(moov), log_cb_(log_cb), sample_offset_(0) {
81  CHECK(moov);
82}
83
84TrackRunIterator::~TrackRunIterator() {}
85
86static void PopulateSampleInfo(const TrackExtends& trex,
87                               const TrackFragmentHeader& tfhd,
88                               const TrackFragmentRun& trun,
89                               const int64 edit_list_offset,
90                               const uint32 i,
91                               SampleInfo* sample_info,
92                               const SampleDependsOn sample_depends_on) {
93  if (i < trun.sample_sizes.size()) {
94    sample_info->size = trun.sample_sizes[i];
95  } else if (tfhd.default_sample_size > 0) {
96    sample_info->size = tfhd.default_sample_size;
97  } else {
98    sample_info->size = trex.default_sample_size;
99  }
100
101  if (i < trun.sample_durations.size()) {
102    sample_info->duration = trun.sample_durations[i];
103  } else if (tfhd.default_sample_duration > 0) {
104    sample_info->duration = tfhd.default_sample_duration;
105  } else {
106    sample_info->duration = trex.default_sample_duration;
107  }
108
109  if (i < trun.sample_composition_time_offsets.size()) {
110    sample_info->cts_offset = trun.sample_composition_time_offsets[i];
111  } else {
112    sample_info->cts_offset = 0;
113  }
114  sample_info->cts_offset += edit_list_offset;
115
116  uint32 flags;
117  if (i < trun.sample_flags.size()) {
118    flags = trun.sample_flags[i];
119  } else if (tfhd.has_default_sample_flags) {
120    flags = tfhd.default_sample_flags;
121  } else {
122    flags = trex.default_sample_flags;
123  }
124
125  switch (sample_depends_on) {
126    case kSampleDependsOnUnknown:
127      sample_info->is_keyframe = !(flags & kSampleIsDifferenceSampleFlagMask);
128      break;
129
130    case kSampleDependsOnOthers:
131      sample_info->is_keyframe = false;
132      break;
133
134    case kSampleDependsOnNoOther:
135      sample_info->is_keyframe = true;
136      break;
137
138    case kSampleDependsOnReserved:
139      CHECK(false);
140  }
141}
142
143// In well-structured encrypted media, each track run will be immediately
144// preceded by its auxiliary information; this is the only optimal storage
145// pattern in terms of minimum number of bytes from a serial stream needed to
146// begin playback. It also allows us to optimize caching on memory-constrained
147// architectures, because we can cache the relatively small auxiliary
148// information for an entire run and then discard data from the input stream,
149// instead of retaining the entire 'mdat' box.
150//
151// We optimize for this situation (with no loss of generality) by sorting track
152// runs during iteration in order of their first data offset (either sample data
153// or auxiliary data).
154class CompareMinTrackRunDataOffset {
155 public:
156  bool operator()(const TrackRunInfo& a, const TrackRunInfo& b) {
157    int64 a_aux = a.aux_info_total_size ? a.aux_info_start_offset : kint64max;
158    int64 b_aux = b.aux_info_total_size ? b.aux_info_start_offset : kint64max;
159
160    int64 a_lesser = std::min(a_aux, a.sample_start_offset);
161    int64 a_greater = std::max(a_aux, a.sample_start_offset);
162    int64 b_lesser = std::min(b_aux, b.sample_start_offset);
163    int64 b_greater = std::max(b_aux, b.sample_start_offset);
164
165    if (a_lesser == b_lesser) return a_greater < b_greater;
166    return a_lesser < b_lesser;
167  }
168};
169
170bool TrackRunIterator::Init(const MovieFragment& moof) {
171  runs_.clear();
172
173  for (size_t i = 0; i < moof.tracks.size(); i++) {
174    const TrackFragment& traf = moof.tracks[i];
175
176    const Track* trak = NULL;
177    for (size_t t = 0; t < moov_->tracks.size(); t++) {
178      if (moov_->tracks[t].header.track_id == traf.header.track_id)
179        trak = &moov_->tracks[t];
180    }
181    RCHECK(trak);
182
183    const TrackExtends* trex = NULL;
184    for (size_t t = 0; t < moov_->extends.tracks.size(); t++) {
185      if (moov_->extends.tracks[t].track_id == traf.header.track_id)
186        trex = &moov_->extends.tracks[t];
187    }
188    RCHECK(trex);
189
190    const SampleDescription& stsd =
191        trak->media.information.sample_table.description;
192    if (stsd.type != kAudio && stsd.type != kVideo) {
193      DVLOG(1) << "Skipping unhandled track type";
194      continue;
195    }
196    size_t desc_idx = traf.header.sample_description_index;
197    if (!desc_idx) desc_idx = trex->default_sample_description_index;
198    RCHECK(desc_idx > 0);  // Descriptions are one-indexed in the file
199    desc_idx -= 1;
200
201    // Process edit list to remove CTS offset introduced in the presence of
202    // B-frames (those that contain a single edit with a nonnegative media
203    // time). Other uses of edit lists are not supported, as they are
204    // both uncommon and better served by higher-level protocols.
205    int64 edit_list_offset = 0;
206    const std::vector<EditListEntry>& edits = trak->edit.list.edits;
207    if (!edits.empty()) {
208      if (edits.size() > 1)
209        DVLOG(1) << "Multi-entry edit box detected; some components ignored.";
210
211      if (edits[0].media_time < 0) {
212        DVLOG(1) << "Empty edit list entry ignored.";
213      } else {
214        edit_list_offset = -edits[0].media_time;
215      }
216    }
217
218    int64 run_start_dts = traf.decode_time.decode_time;
219    int sample_count_sum = 0;
220    bool is_sync_sample_box_present =
221        trak->media.information.sample_table.sync_sample.is_present;
222    for (size_t j = 0; j < traf.runs.size(); j++) {
223      const TrackFragmentRun& trun = traf.runs[j];
224      TrackRunInfo tri;
225      tri.track_id = traf.header.track_id;
226      tri.timescale = trak->media.header.timescale;
227      tri.start_dts = run_start_dts;
228      tri.sample_start_offset = trun.data_offset;
229
230      tri.is_audio = (stsd.type == kAudio);
231      if (tri.is_audio) {
232        RCHECK(!stsd.audio_entries.empty());
233        if (desc_idx > stsd.audio_entries.size())
234          desc_idx = 0;
235        tri.audio_description = &stsd.audio_entries[desc_idx];
236      } else {
237        RCHECK(!stsd.video_entries.empty());
238        if (desc_idx > stsd.video_entries.size())
239          desc_idx = 0;
240        tri.video_description = &stsd.video_entries[desc_idx];
241      }
242
243      // Collect information from the auxiliary_offset entry with the same index
244      // in the 'saiz' container as the current run's index in the 'trun'
245      // container, if it is present.
246      if (traf.auxiliary_offset.offsets.size() > j) {
247        // There should be an auxiliary info entry corresponding to each sample
248        // in the auxiliary offset entry's corresponding track run.
249        RCHECK(traf.auxiliary_size.sample_count >=
250               sample_count_sum + trun.sample_count);
251        tri.aux_info_start_offset = traf.auxiliary_offset.offsets[j];
252        tri.aux_info_default_size =
253            traf.auxiliary_size.default_sample_info_size;
254        if (tri.aux_info_default_size == 0) {
255          const std::vector<uint8>& sizes =
256              traf.auxiliary_size.sample_info_sizes;
257          tri.aux_info_sizes.insert(tri.aux_info_sizes.begin(),
258              sizes.begin() + sample_count_sum,
259              sizes.begin() + sample_count_sum + trun.sample_count);
260        }
261
262        // If the default info size is positive, find the total size of the aux
263        // info block from it, otherwise sum over the individual sizes of each
264        // aux info entry in the aux_offset entry.
265        if (tri.aux_info_default_size) {
266          tri.aux_info_total_size =
267              tri.aux_info_default_size * trun.sample_count;
268        } else {
269          tri.aux_info_total_size = 0;
270          for (size_t k = 0; k < trun.sample_count; k++) {
271            tri.aux_info_total_size += tri.aux_info_sizes[k];
272          }
273        }
274      } else {
275        tri.aux_info_start_offset = -1;
276        tri.aux_info_total_size = 0;
277      }
278
279      tri.samples.resize(trun.sample_count);
280      for (size_t k = 0; k < trun.sample_count; k++) {
281        PopulateSampleInfo(*trex, traf.header, trun, edit_list_offset,
282                           k, &tri.samples[k], traf.sdtp.sample_depends_on(k));
283        run_start_dts += tri.samples[k].duration;
284
285        // ISO-14496-12 Section 8.20.1 : If the sync sample box is not present,
286        // every sample is a random access point.
287        //
288        // NOTE: MPEG's "is random access point" concept is equivalent to this
289        // and downstream code's "is keyframe" concept.
290        if (!is_sync_sample_box_present)
291          tri.samples[k].is_keyframe = true;
292      }
293      runs_.push_back(tri);
294      sample_count_sum += trun.sample_count;
295    }
296  }
297
298  std::sort(runs_.begin(), runs_.end(), CompareMinTrackRunDataOffset());
299  run_itr_ = runs_.begin();
300  ResetRun();
301  return true;
302}
303
304void TrackRunIterator::AdvanceRun() {
305  ++run_itr_;
306  ResetRun();
307}
308
309void TrackRunIterator::ResetRun() {
310  if (!IsRunValid()) return;
311  sample_dts_ = run_itr_->start_dts;
312  sample_offset_ = run_itr_->sample_start_offset;
313  sample_itr_ = run_itr_->samples.begin();
314  cenc_info_.clear();
315}
316
317void TrackRunIterator::AdvanceSample() {
318  DCHECK(IsSampleValid());
319  sample_dts_ += sample_itr_->duration;
320  sample_offset_ += sample_itr_->size;
321  ++sample_itr_;
322}
323
324// This implementation only indicates a need for caching if CENC auxiliary
325// info is available in the stream.
326bool TrackRunIterator::AuxInfoNeedsToBeCached() {
327  DCHECK(IsRunValid());
328  return is_encrypted() && aux_info_size() > 0 && cenc_info_.size() == 0;
329}
330
331// This implementation currently only caches CENC auxiliary info.
332bool TrackRunIterator::CacheAuxInfo(const uint8* buf, int buf_size) {
333  RCHECK(AuxInfoNeedsToBeCached() && buf_size >= aux_info_size());
334
335  cenc_info_.resize(run_itr_->samples.size());
336  int64 pos = 0;
337  for (size_t i = 0; i < run_itr_->samples.size(); i++) {
338    int info_size = run_itr_->aux_info_default_size;
339    if (!info_size)
340      info_size = run_itr_->aux_info_sizes[i];
341
342    BufferReader reader(buf + pos, info_size);
343    RCHECK(cenc_info_[i].Parse(track_encryption().default_iv_size, &reader));
344    pos += info_size;
345  }
346
347  return true;
348}
349
350bool TrackRunIterator::IsRunValid() const {
351  return run_itr_ != runs_.end();
352}
353
354bool TrackRunIterator::IsSampleValid() const {
355  return IsRunValid() && (sample_itr_ != run_itr_->samples.end());
356}
357
358// Because tracks are in sorted order and auxiliary information is cached when
359// returning samples, it is guaranteed that no data will be required before the
360// lesser of the minimum data offset of this track and the next in sequence.
361// (The stronger condition - that no data is required before the minimum data
362// offset of this track alone - is not guaranteed, because the BMFF spec does
363// not have any inter-run ordering restrictions.)
364int64 TrackRunIterator::GetMaxClearOffset() {
365  int64 offset = kint64max;
366
367  if (IsSampleValid()) {
368    offset = std::min(offset, sample_offset_);
369    if (AuxInfoNeedsToBeCached())
370      offset = std::min(offset, aux_info_offset());
371  }
372  if (run_itr_ != runs_.end()) {
373    std::vector<TrackRunInfo>::const_iterator next_run = run_itr_ + 1;
374    if (next_run != runs_.end()) {
375      offset = std::min(offset, next_run->sample_start_offset);
376      if (next_run->aux_info_total_size)
377        offset = std::min(offset, next_run->aux_info_start_offset);
378    }
379  }
380  if (offset == kint64max) return 0;
381  return offset;
382}
383
384uint32 TrackRunIterator::track_id() const {
385  DCHECK(IsRunValid());
386  return run_itr_->track_id;
387}
388
389bool TrackRunIterator::is_encrypted() const {
390  DCHECK(IsRunValid());
391  return track_encryption().is_encrypted;
392}
393
394int64 TrackRunIterator::aux_info_offset() const {
395  return run_itr_->aux_info_start_offset;
396}
397
398int TrackRunIterator::aux_info_size() const {
399  return run_itr_->aux_info_total_size;
400}
401
402bool TrackRunIterator::is_audio() const {
403  DCHECK(IsRunValid());
404  return run_itr_->is_audio;
405}
406
407const AudioSampleEntry& TrackRunIterator::audio_description() const {
408  DCHECK(is_audio());
409  DCHECK(run_itr_->audio_description);
410  return *run_itr_->audio_description;
411}
412
413const VideoSampleEntry& TrackRunIterator::video_description() const {
414  DCHECK(!is_audio());
415  DCHECK(run_itr_->video_description);
416  return *run_itr_->video_description;
417}
418
419int64 TrackRunIterator::sample_offset() const {
420  DCHECK(IsSampleValid());
421  return sample_offset_;
422}
423
424int TrackRunIterator::sample_size() const {
425  DCHECK(IsSampleValid());
426  return sample_itr_->size;
427}
428
429TimeDelta TrackRunIterator::dts() const {
430  DCHECK(IsSampleValid());
431  return TimeDeltaFromRational(sample_dts_, run_itr_->timescale);
432}
433
434TimeDelta TrackRunIterator::cts() const {
435  DCHECK(IsSampleValid());
436  return TimeDeltaFromRational(sample_dts_ + sample_itr_->cts_offset,
437                               run_itr_->timescale);
438}
439
440TimeDelta TrackRunIterator::duration() const {
441  DCHECK(IsSampleValid());
442  return TimeDeltaFromRational(sample_itr_->duration, run_itr_->timescale);
443}
444
445bool TrackRunIterator::is_keyframe() const {
446  DCHECK(IsSampleValid());
447  return sample_itr_->is_keyframe;
448}
449
450const TrackEncryption& TrackRunIterator::track_encryption() const {
451  if (is_audio())
452    return audio_description().sinf.info.track_encryption;
453  return video_description().sinf.info.track_encryption;
454}
455
456scoped_ptr<DecryptConfig> TrackRunIterator::GetDecryptConfig() {
457  size_t sample_idx = sample_itr_ - run_itr_->samples.begin();
458  DCHECK(sample_idx < cenc_info_.size());
459  const FrameCENCInfo& cenc_info = cenc_info_[sample_idx];
460  DCHECK(is_encrypted() && !AuxInfoNeedsToBeCached());
461
462  size_t total_size = 0;
463  if (!cenc_info.subsamples.empty() &&
464      (!cenc_info.GetTotalSizeOfSubsamples(&total_size) ||
465       total_size != static_cast<size_t>(sample_size()))) {
466    MEDIA_LOG(log_cb_) << "Incorrect CENC subsample size.";
467    return scoped_ptr<DecryptConfig>();
468  }
469
470  const std::vector<uint8>& kid = track_encryption().default_kid;
471  return scoped_ptr<DecryptConfig>(new DecryptConfig(
472      std::string(reinterpret_cast<const char*>(&kid[0]), kid.size()),
473      std::string(reinterpret_cast<const char*>(cenc_info.iv),
474                  arraysize(cenc_info.iv)),
475      cenc_info.subsamples));
476}
477
478}  // namespace mp4
479}  // namespace media
480