1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "media/base/audio_splicer.h"
6
7#include <cstdlib>
8#include <deque>
9
10#include "base/logging.h"
11#include "media/base/audio_buffer.h"
12#include "media/base/audio_bus.h"
13#include "media/base/audio_decoder_config.h"
14#include "media/base/audio_timestamp_helper.h"
15#include "media/base/vector_math.h"
16
17namespace media {
18
19// Minimum gap size needed before the splicer will take action to
20// fill a gap. This avoids periodically inserting and then dropping samples
21// when the buffer timestamps are slightly off because of timestamp rounding
22// in the source content. Unit is frames.
23static const int kMinGapSize = 2;
24
25// AudioBuffer::TrimStart() is not as accurate as the timestamp helper, so
26// manually adjust the duration and timestamp after trimming.
27static void AccurateTrimStart(int frames_to_trim,
28                              const scoped_refptr<AudioBuffer> buffer,
29                              const AudioTimestampHelper& timestamp_helper) {
30  buffer->TrimStart(frames_to_trim);
31  buffer->set_timestamp(timestamp_helper.GetTimestamp());
32}
33
34// Returns an AudioBus whose frame buffer is backed by the provided AudioBuffer.
35static scoped_ptr<AudioBus> CreateAudioBufferWrapper(
36    const scoped_refptr<AudioBuffer>& buffer) {
37  scoped_ptr<AudioBus> wrapper =
38      AudioBus::CreateWrapper(buffer->channel_count());
39  wrapper->set_frames(buffer->frame_count());
40  for (int ch = 0; ch < buffer->channel_count(); ++ch) {
41    wrapper->SetChannelData(
42        ch, reinterpret_cast<float*>(buffer->channel_data()[ch]));
43  }
44  return wrapper.Pass();
45}
46
47class AudioStreamSanitizer {
48 public:
49  explicit AudioStreamSanitizer(int samples_per_second);
50  ~AudioStreamSanitizer();
51
52  // Resets the sanitizer state by clearing the output buffers queue, and
53  // resetting the timestamp helper.
54  void Reset();
55
56  // Similar to Reset(), but initializes the timestamp helper with the given
57  // parameters.
58  void ResetTimestampState(int64 frame_count, base::TimeDelta base_timestamp);
59
60  // Adds a new buffer full of samples or end of stream buffer to the splicer.
61  // Returns true if the buffer was accepted. False is returned if an error
62  // occurred.
63  bool AddInput(const scoped_refptr<AudioBuffer>& input);
64
65  // Returns true if the sanitizer has a buffer to return.
66  bool HasNextBuffer() const;
67
68  // Removes the next buffer from the output buffer queue and returns it; should
69  // only be called if HasNextBuffer() returns true.
70  scoped_refptr<AudioBuffer> GetNextBuffer();
71
72  // Returns the total frame count of all buffers available for output.
73  int GetFrameCount() const;
74
75  const AudioTimestampHelper& timestamp_helper() {
76    return output_timestamp_helper_;
77  }
78
79  // Transfer all buffers into |output|.  Returns false if AddInput() on the
80  // |output| sanitizer fails for any buffer removed from |this|.
81  bool DrainInto(AudioStreamSanitizer* output);
82
83 private:
84  void AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer);
85
86  AudioTimestampHelper output_timestamp_helper_;
87  bool received_end_of_stream_;
88
89  typedef std::deque<scoped_refptr<AudioBuffer> > BufferQueue;
90  BufferQueue output_buffers_;
91
92  DISALLOW_ASSIGN(AudioStreamSanitizer);
93};
94
95AudioStreamSanitizer::AudioStreamSanitizer(int samples_per_second)
96    : output_timestamp_helper_(samples_per_second),
97      received_end_of_stream_(false) {}
98
99AudioStreamSanitizer::~AudioStreamSanitizer() {}
100
101void AudioStreamSanitizer::Reset() {
102  ResetTimestampState(0, kNoTimestamp());
103}
104
105void AudioStreamSanitizer::ResetTimestampState(int64 frame_count,
106                                               base::TimeDelta base_timestamp) {
107  output_buffers_.clear();
108  received_end_of_stream_ = false;
109  output_timestamp_helper_.SetBaseTimestamp(base_timestamp);
110  if (frame_count > 0)
111    output_timestamp_helper_.AddFrames(frame_count);
112}
113
114bool AudioStreamSanitizer::AddInput(const scoped_refptr<AudioBuffer>& input) {
115  DCHECK(!received_end_of_stream_ || input->end_of_stream());
116
117  if (input->end_of_stream()) {
118    output_buffers_.push_back(input);
119    received_end_of_stream_ = true;
120    return true;
121  }
122
123  DCHECK(input->timestamp() != kNoTimestamp());
124  DCHECK(input->duration() > base::TimeDelta());
125  DCHECK_GT(input->frame_count(), 0);
126
127  if (output_timestamp_helper_.base_timestamp() == kNoTimestamp())
128    output_timestamp_helper_.SetBaseTimestamp(input->timestamp());
129
130  if (output_timestamp_helper_.base_timestamp() > input->timestamp()) {
131    DVLOG(1) << "Input timestamp is before the base timestamp.";
132    return false;
133  }
134
135  const base::TimeDelta timestamp = input->timestamp();
136  const base::TimeDelta expected_timestamp =
137      output_timestamp_helper_.GetTimestamp();
138  const base::TimeDelta delta = timestamp - expected_timestamp;
139
140  if (std::abs(delta.InMilliseconds()) >
141      AudioSplicer::kMaxTimeDeltaInMilliseconds) {
142    DVLOG(1) << "Timestamp delta too large: " << delta.InMicroseconds() << "us";
143    return false;
144  }
145
146  int frames_to_fill = 0;
147  if (delta != base::TimeDelta())
148    frames_to_fill = output_timestamp_helper_.GetFramesToTarget(timestamp);
149
150  if (frames_to_fill == 0 || std::abs(frames_to_fill) < kMinGapSize) {
151    AddOutputBuffer(input);
152    return true;
153  }
154
155  if (frames_to_fill > 0) {
156    DVLOG(1) << "Gap detected @ " << expected_timestamp.InMicroseconds()
157             << " us: " << delta.InMicroseconds() << " us";
158
159    // Create a buffer with enough silence samples to fill the gap and
160    // add it to the output buffer.
161    scoped_refptr<AudioBuffer> gap =
162        AudioBuffer::CreateEmptyBuffer(input->channel_layout(),
163                                       input->channel_count(),
164                                       input->sample_rate(),
165                                       frames_to_fill,
166                                       expected_timestamp);
167    AddOutputBuffer(gap);
168
169    // Add the input buffer now that the gap has been filled.
170    AddOutputBuffer(input);
171    return true;
172  }
173
174  // Overlapping buffers marked as splice frames are handled by AudioSplicer,
175  // but decoder and demuxer quirks may sometimes produce overlapping samples
176  // which need to be sanitized.
177  //
178  // A crossfade can't be done here because only the current buffer is available
179  // at this point, not previous buffers.
180  DVLOG(1) << "Overlap detected @ " << expected_timestamp.InMicroseconds()
181           << " us: " << -delta.InMicroseconds() << " us";
182
183  const int frames_to_skip = -frames_to_fill;
184  if (input->frame_count() <= frames_to_skip) {
185    DVLOG(1) << "Dropping whole buffer";
186    return true;
187  }
188
189  // Copy the trailing samples that do not overlap samples already output
190  // into a new buffer.  Add this new buffer to the output queue.
191  //
192  // TODO(acolwell): Implement a cross-fade here so the transition is less
193  // jarring.
194  AccurateTrimStart(frames_to_skip, input, output_timestamp_helper_);
195  AddOutputBuffer(input);
196  return true;
197}
198
199bool AudioStreamSanitizer::HasNextBuffer() const {
200  return !output_buffers_.empty();
201}
202
203scoped_refptr<AudioBuffer> AudioStreamSanitizer::GetNextBuffer() {
204  scoped_refptr<AudioBuffer> ret = output_buffers_.front();
205  output_buffers_.pop_front();
206  return ret;
207}
208
209void AudioStreamSanitizer::AddOutputBuffer(
210    const scoped_refptr<AudioBuffer>& buffer) {
211  output_timestamp_helper_.AddFrames(buffer->frame_count());
212  output_buffers_.push_back(buffer);
213}
214
215int AudioStreamSanitizer::GetFrameCount() const {
216  int frame_count = 0;
217  for (const auto& buffer : output_buffers_)
218    frame_count += buffer->frame_count();
219  return frame_count;
220}
221
222bool AudioStreamSanitizer::DrainInto(AudioStreamSanitizer* output) {
223  while (HasNextBuffer()) {
224    if (!output->AddInput(GetNextBuffer()))
225      return false;
226  }
227  return true;
228}
229
230AudioSplicer::AudioSplicer(int samples_per_second)
231    : max_crossfade_duration_(
232          base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds)),
233      splice_timestamp_(kNoTimestamp()),
234      max_splice_end_timestamp_(kNoTimestamp()),
235      output_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
236      pre_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
237      post_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
238      have_all_pre_splice_buffers_(false) {}
239
240AudioSplicer::~AudioSplicer() {}
241
242void AudioSplicer::Reset() {
243  output_sanitizer_->Reset();
244  pre_splice_sanitizer_->Reset();
245  post_splice_sanitizer_->Reset();
246  have_all_pre_splice_buffers_ = false;
247  reset_splice_timestamps();
248}
249
250bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {
251  // If we're not processing a splice, add the input to the output queue.
252  if (splice_timestamp_ == kNoTimestamp()) {
253    DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
254    DCHECK(!post_splice_sanitizer_->HasNextBuffer());
255    return output_sanitizer_->AddInput(input);
256  }
257
258  const AudioTimestampHelper& output_ts_helper =
259      output_sanitizer_->timestamp_helper();
260
261  if (!have_all_pre_splice_buffers_) {
262    DCHECK(!input->end_of_stream());
263
264    // If the provided buffer is entirely before the splice point it can also be
265    // added to the output queue.
266    if (input->timestamp() + input->duration() < splice_timestamp_) {
267      DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
268      return output_sanitizer_->AddInput(input);
269    }
270
271    // If we've encountered the first pre splice buffer, reset the pre splice
272    // sanitizer based on |output_sanitizer_|.  This is done so that gaps and
273    // overlaps between buffers across the sanitizers are accounted for prior
274    // to calculating crossfade.
275    if (!pre_splice_sanitizer_->HasNextBuffer()) {
276      pre_splice_sanitizer_->ResetTimestampState(
277          output_ts_helper.frame_count(), output_ts_helper.base_timestamp());
278    }
279
280    return pre_splice_sanitizer_->AddInput(input);
281  }
282
283  // The first post splice buffer is expected to match |splice_timestamp_|.
284  if (!post_splice_sanitizer_->HasNextBuffer())
285    CHECK(splice_timestamp_ == input->timestamp());
286
287  // At this point we have all the fade out preroll buffers from the decoder.
288  // We now need to wait until we have enough data to perform the crossfade (or
289  // we receive an end of stream).
290  if (!post_splice_sanitizer_->AddInput(input))
291    return false;
292
293  // Ensure |output_sanitizer_| has a valid base timestamp so we can use it for
294  // timestamp calculations.
295  if (output_ts_helper.base_timestamp() == kNoTimestamp()) {
296    output_sanitizer_->ResetTimestampState(
297        0, pre_splice_sanitizer_->timestamp_helper().base_timestamp());
298  }
299
300  // If a splice frame was incorrectly marked due to poor demuxed timestamps, we
301  // may not actually have a splice.  Here we check if any frames exist before
302  // the splice.  In this case, just transfer all data to the output sanitizer.
303  const int frames_before_splice =
304      output_ts_helper.GetFramesToTarget(splice_timestamp_);
305  if (frames_before_splice < 0 ||
306      pre_splice_sanitizer_->GetFrameCount() <= frames_before_splice) {
307    CHECK(pre_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
308
309    // If the file contains incorrectly muxed timestamps, there may be huge gaps
310    // between the demuxed and decoded timestamps.
311    if (!post_splice_sanitizer_->DrainInto(output_sanitizer_.get()))
312      return false;
313
314    reset_splice_timestamps();
315    return true;
316  }
317
318  // Wait until we have enough data to crossfade or end of stream.
319  if (!input->end_of_stream() &&
320      input->timestamp() + input->duration() < max_splice_end_timestamp_) {
321    return true;
322  }
323
324  scoped_refptr<AudioBuffer> crossfade_buffer;
325  scoped_ptr<AudioBus> pre_splice =
326      ExtractCrossfadeFromPreSplice(&crossfade_buffer);
327
328  // Crossfade the pre splice and post splice sections and transfer all relevant
329  // buffers into |output_sanitizer_|.
330  CrossfadePostSplice(pre_splice.Pass(), crossfade_buffer);
331
332  // Clear the splice timestamp so new splices can be accepted.
333  reset_splice_timestamps();
334  return true;
335}
336
337bool AudioSplicer::HasNextBuffer() const {
338  return output_sanitizer_->HasNextBuffer();
339}
340
341scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() {
342  return output_sanitizer_->GetNextBuffer();
343}
344
345void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp) {
346  if (splice_timestamp == kNoTimestamp()) {
347    DCHECK(splice_timestamp_ != kNoTimestamp());
348    DCHECK(!have_all_pre_splice_buffers_);
349    have_all_pre_splice_buffers_ = true;
350    return;
351  }
352
353  if (splice_timestamp_ == splice_timestamp)
354    return;
355
356  // TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to
357  // handle cases where another splice comes in before we've received 5ms of
358  // data from the last one.  Leave this as a CHECK for now to figure out if
359  // this case is possible.
360  CHECK(splice_timestamp_ == kNoTimestamp());
361  splice_timestamp_ = splice_timestamp;
362  max_splice_end_timestamp_ = splice_timestamp_ + max_crossfade_duration_;
363  pre_splice_sanitizer_->Reset();
364  post_splice_sanitizer_->Reset();
365  have_all_pre_splice_buffers_ = false;
366}
367
368scoped_ptr<AudioBus> AudioSplicer::ExtractCrossfadeFromPreSplice(
369    scoped_refptr<AudioBuffer>* crossfade_buffer) {
370  DCHECK(crossfade_buffer);
371  const AudioTimestampHelper& output_ts_helper =
372      output_sanitizer_->timestamp_helper();
373
374  int frames_before_splice =
375      output_ts_helper.GetFramesToTarget(splice_timestamp_);
376
377  // Determine crossfade frame count based on available frames in each splicer
378  // and capping to the maximum crossfade duration.
379  const int max_crossfade_frame_count =
380      output_ts_helper.GetFramesToTarget(max_splice_end_timestamp_) -
381      frames_before_splice;
382  const int frames_to_crossfade = std::min(
383      max_crossfade_frame_count,
384      std::min(pre_splice_sanitizer_->GetFrameCount() - frames_before_splice,
385               post_splice_sanitizer_->GetFrameCount()));
386  // There must always be frames to crossfade, otherwise the splice should not
387  // have been generated.
388  DCHECK_GT(frames_to_crossfade, 0);
389
390  int frames_read = 0;
391  scoped_ptr<AudioBus> output_bus;
392  while (pre_splice_sanitizer_->HasNextBuffer() &&
393         frames_read < frames_to_crossfade) {
394    scoped_refptr<AudioBuffer> preroll = pre_splice_sanitizer_->GetNextBuffer();
395
396    // We don't know the channel count until we see the first buffer, so wait
397    // until the first buffer to allocate the output AudioBus.
398    if (!output_bus) {
399      output_bus =
400          AudioBus::Create(preroll->channel_count(), frames_to_crossfade);
401      // Allocate output buffer for crossfade.
402      *crossfade_buffer = AudioBuffer::CreateBuffer(kSampleFormatPlanarF32,
403                                                    preroll->channel_layout(),
404                                                    preroll->channel_count(),
405                                                    preroll->sample_rate(),
406                                                    frames_to_crossfade);
407    }
408
409    // There may be enough of a gap introduced during decoding such that an
410    // entire buffer exists before the splice point.
411    if (frames_before_splice >= preroll->frame_count()) {
412      // Adjust the number of frames remaining before the splice.  NOTE: This is
413      // safe since |pre_splice_sanitizer_| is a continuation of the timeline in
414      // |output_sanitizer_|.  As such we're guaranteed there are no gaps or
415      // overlaps in the timeline between the two sanitizers.
416      frames_before_splice -= preroll->frame_count();
417      CHECK(output_sanitizer_->AddInput(preroll));
418      continue;
419    }
420
421    const int frames_to_read =
422        std::min(preroll->frame_count() - frames_before_splice,
423                 output_bus->frames() - frames_read);
424    preroll->ReadFrames(
425        frames_to_read, frames_before_splice, frames_read, output_bus.get());
426    frames_read += frames_to_read;
427
428    // If only part of the buffer was consumed, trim it appropriately and stick
429    // it into the output queue.
430    if (frames_before_splice) {
431      preroll->TrimEnd(preroll->frame_count() - frames_before_splice);
432      CHECK(output_sanitizer_->AddInput(preroll));
433      frames_before_splice = 0;
434    }
435  }
436
437  // Ensure outputs were properly allocated.  The method should not have been
438  // called if there is not enough data to crossfade.
439  // TODO(dalecurtis): Convert to DCHECK() once http://crbug.com/356073 fixed.
440  CHECK(output_bus);
441  CHECK(crossfade_buffer->get());
442
443  // All necessary buffers have been processed, it's safe to reset.
444  pre_splice_sanitizer_->Reset();
445  DCHECK_EQ(output_bus->frames(), frames_read);
446  DCHECK_EQ(output_ts_helper.GetFramesToTarget(splice_timestamp_), 0);
447  return output_bus.Pass();
448}
449
450void AudioSplicer::CrossfadePostSplice(
451    scoped_ptr<AudioBus> pre_splice_bus,
452    const scoped_refptr<AudioBuffer>& crossfade_buffer) {
453  // Use the calculated timestamp and duration to ensure there's no extra gaps
454  // or overlaps to process when adding the buffer to |output_sanitizer_|.
455  const AudioTimestampHelper& output_ts_helper =
456      output_sanitizer_->timestamp_helper();
457  crossfade_buffer->set_timestamp(output_ts_helper.GetTimestamp());
458
459  // AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap
460  // our AudioBuffer in one so we can avoid extra data copies.
461  scoped_ptr<AudioBus> output_bus = CreateAudioBufferWrapper(crossfade_buffer);
462
463  // Extract crossfade section from the |post_splice_sanitizer_|.
464  int frames_read = 0, frames_to_trim = 0;
465  scoped_refptr<AudioBuffer> remainder;
466  while (post_splice_sanitizer_->HasNextBuffer() &&
467         frames_read < output_bus->frames()) {
468    scoped_refptr<AudioBuffer> postroll =
469        post_splice_sanitizer_->GetNextBuffer();
470    const int frames_to_read =
471        std::min(postroll->frame_count(), output_bus->frames() - frames_read);
472    postroll->ReadFrames(frames_to_read, 0, frames_read, output_bus.get());
473    frames_read += frames_to_read;
474
475    // If only part of the buffer was consumed, save it for after we've added
476    // the crossfade buffer
477    if (frames_to_read < postroll->frame_count()) {
478      DCHECK(!remainder.get());
479      remainder.swap(postroll);
480      frames_to_trim = frames_to_read;
481    }
482  }
483
484  DCHECK_EQ(output_bus->frames(), frames_read);
485
486  // Crossfade the audio into |crossfade_buffer|.
487  for (int ch = 0; ch < output_bus->channels(); ++ch) {
488    vector_math::Crossfade(pre_splice_bus->channel(ch),
489                           pre_splice_bus->frames(),
490                           output_bus->channel(ch));
491  }
492
493  CHECK(output_sanitizer_->AddInput(crossfade_buffer));
494  DCHECK_EQ(crossfade_buffer->frame_count(), output_bus->frames());
495
496  if (remainder.get()) {
497    // Trim off consumed frames.
498    AccurateTrimStart(frames_to_trim, remainder, output_ts_helper);
499    CHECK(output_sanitizer_->AddInput(remainder));
500  }
501
502  // Transfer all remaining buffers out and reset once empty.
503  CHECK(post_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
504  post_splice_sanitizer_->Reset();
505}
506
507}  // namespace media
508