1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "media/filters/ffmpeg_audio_decoder.h"
6
7#include "base/callback_helpers.h"
8#include "base/single_thread_task_runner.h"
9#include "media/base/audio_buffer.h"
10#include "media/base/audio_bus.h"
11#include "media/base/audio_decoder_config.h"
12#include "media/base/audio_discard_helper.h"
13#include "media/base/bind_to_current_loop.h"
14#include "media/base/decoder_buffer.h"
15#include "media/base/limits.h"
16#include "media/base/sample_format.h"
17#include "media/ffmpeg/ffmpeg_common.h"
18#include "media/filters/ffmpeg_glue.h"
19
20namespace media {
21
22// Returns true if the decode result was end of stream.
23static inline bool IsEndOfStream(int result,
24                                 int decoded_size,
25                                 const scoped_refptr<DecoderBuffer>& input) {
26  // Three conditions to meet to declare end of stream for this decoder:
27  // 1. FFmpeg didn't read anything.
28  // 2. FFmpeg didn't output anything.
29  // 3. An end of stream buffer is received.
30  return result == 0 && decoded_size == 0 && input->end_of_stream();
31}
32
33// Return the number of channels from the data in |frame|.
34static inline int DetermineChannels(AVFrame* frame) {
35#if defined(CHROMIUM_NO_AVFRAME_CHANNELS)
36  // When use_system_ffmpeg==1, libav's AVFrame doesn't have channels field.
37  return av_get_channel_layout_nb_channels(frame->channel_layout);
38#else
39  return frame->channels;
40#endif
41}
42
43// Called by FFmpeg's allocation routine to free a buffer. |opaque| is the
44// AudioBuffer allocated, so unref it.
45static void ReleaseAudioBufferImpl(void* opaque, uint8* data) {
46  scoped_refptr<AudioBuffer> buffer;
47  buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
48}
49
50// Called by FFmpeg's allocation routine to allocate a buffer. Uses
51// AVCodecContext.opaque to get the object reference in order to call
52// GetAudioBuffer() to do the actual allocation.
53static int GetAudioBuffer(struct AVCodecContext* s, AVFrame* frame, int flags) {
54  DCHECK(s->codec->capabilities & CODEC_CAP_DR1);
55  DCHECK_EQ(s->codec_type, AVMEDIA_TYPE_AUDIO);
56
57  // Since this routine is called by FFmpeg when a buffer is required for audio
58  // data, use the values supplied by FFmpeg (ignoring the current settings).
59  // FFmpegDecode() gets to determine if the buffer is useable or not.
60  AVSampleFormat format = static_cast<AVSampleFormat>(frame->format);
61  SampleFormat sample_format = AVSampleFormatToSampleFormat(format);
62  int channels = DetermineChannels(frame);
63  if (channels <= 0 || channels >= limits::kMaxChannels) {
64    DLOG(ERROR) << "Requested number of channels (" << channels
65                << ") exceeds limit.";
66    return AVERROR(EINVAL);
67  }
68
69  int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
70  if (frame->nb_samples <= 0)
71    return AVERROR(EINVAL);
72
73  if (s->channels != channels) {
74    DLOG(ERROR) << "AVCodecContext and AVFrame disagree on channel count.";
75    return AVERROR(EINVAL);
76  }
77
78  // Determine how big the buffer should be and allocate it. FFmpeg may adjust
79  // how big each channel data is in order to meet the alignment policy, so
80  // we need to take this into consideration.
81  int buffer_size_in_bytes =
82      av_samples_get_buffer_size(&frame->linesize[0],
83                                 channels,
84                                 frame->nb_samples,
85                                 format,
86                                 AudioBuffer::kChannelAlignment);
87  // Check for errors from av_samples_get_buffer_size().
88  if (buffer_size_in_bytes < 0)
89    return buffer_size_in_bytes;
90  int frames_required = buffer_size_in_bytes / bytes_per_channel / channels;
91  DCHECK_GE(frames_required, frame->nb_samples);
92  scoped_refptr<AudioBuffer> buffer = AudioBuffer::CreateBuffer(
93      sample_format,
94      ChannelLayoutToChromeChannelLayout(s->channel_layout, s->channels),
95      channels,
96      s->sample_rate,
97      frames_required);
98
99  // Initialize the data[] and extended_data[] fields to point into the memory
100  // allocated for AudioBuffer. |number_of_planes| will be 1 for interleaved
101  // audio and equal to |channels| for planar audio.
102  int number_of_planes = buffer->channel_data().size();
103  if (number_of_planes <= AV_NUM_DATA_POINTERS) {
104    DCHECK_EQ(frame->extended_data, frame->data);
105    for (int i = 0; i < number_of_planes; ++i)
106      frame->data[i] = buffer->channel_data()[i];
107  } else {
108    // There are more channels than can fit into data[], so allocate
109    // extended_data[] and fill appropriately.
110    frame->extended_data = static_cast<uint8**>(
111        av_malloc(number_of_planes * sizeof(*frame->extended_data)));
112    int i = 0;
113    for (; i < AV_NUM_DATA_POINTERS; ++i)
114      frame->extended_data[i] = frame->data[i] = buffer->channel_data()[i];
115    for (; i < number_of_planes; ++i)
116      frame->extended_data[i] = buffer->channel_data()[i];
117  }
118
119  // Now create an AVBufferRef for the data just allocated. It will own the
120  // reference to the AudioBuffer object.
121  void* opaque = NULL;
122  buffer.swap(reinterpret_cast<AudioBuffer**>(&opaque));
123  frame->buf[0] = av_buffer_create(
124      frame->data[0], buffer_size_in_bytes, ReleaseAudioBufferImpl, opaque, 0);
125  return 0;
126}
127
128FFmpegAudioDecoder::FFmpegAudioDecoder(
129    const scoped_refptr<base::SingleThreadTaskRunner>& task_runner,
130    const LogCB& log_cb)
131    : task_runner_(task_runner),
132      state_(kUninitialized),
133      av_sample_format_(0),
134      log_cb_(log_cb) {
135}
136
137FFmpegAudioDecoder::~FFmpegAudioDecoder() {
138  DCHECK(task_runner_->BelongsToCurrentThread());
139
140  if (state_ != kUninitialized) {
141    ReleaseFFmpegResources();
142    ResetTimestampState();
143  }
144}
145
146std::string FFmpegAudioDecoder::GetDisplayName() const {
147  return "FFmpegAudioDecoder";
148}
149
150void FFmpegAudioDecoder::Initialize(const AudioDecoderConfig& config,
151                                    const PipelineStatusCB& status_cb,
152                                    const OutputCB& output_cb) {
153  DCHECK(task_runner_->BelongsToCurrentThread());
154  DCHECK(!config.is_encrypted());
155
156  FFmpegGlue::InitializeFFmpeg();
157
158  config_ = config;
159  PipelineStatusCB initialize_cb = BindToCurrentLoop(status_cb);
160
161  if (!config.IsValidConfig() || !ConfigureDecoder()) {
162    initialize_cb.Run(DECODER_ERROR_NOT_SUPPORTED);
163    return;
164  }
165
166  // Success!
167  output_cb_ = BindToCurrentLoop(output_cb);
168  state_ = kNormal;
169  initialize_cb.Run(PIPELINE_OK);
170}
171
172void FFmpegAudioDecoder::Decode(const scoped_refptr<DecoderBuffer>& buffer,
173                                const DecodeCB& decode_cb) {
174  DCHECK(task_runner_->BelongsToCurrentThread());
175  DCHECK(!decode_cb.is_null());
176  CHECK_NE(state_, kUninitialized);
177  DecodeCB decode_cb_bound = BindToCurrentLoop(decode_cb);
178
179  if (state_ == kError) {
180    decode_cb_bound.Run(kDecodeError);
181    return;
182  }
183
184  // Do nothing if decoding has finished.
185  if (state_ == kDecodeFinished) {
186    decode_cb_bound.Run(kOk);
187    return;
188  }
189
190  DecodeBuffer(buffer, decode_cb_bound);
191}
192
193void FFmpegAudioDecoder::Reset(const base::Closure& closure) {
194  DCHECK(task_runner_->BelongsToCurrentThread());
195
196  avcodec_flush_buffers(codec_context_.get());
197  state_ = kNormal;
198  ResetTimestampState();
199  task_runner_->PostTask(FROM_HERE, closure);
200}
201
202void FFmpegAudioDecoder::DecodeBuffer(
203    const scoped_refptr<DecoderBuffer>& buffer,
204    const DecodeCB& decode_cb) {
205  DCHECK(task_runner_->BelongsToCurrentThread());
206  DCHECK_NE(state_, kUninitialized);
207  DCHECK_NE(state_, kDecodeFinished);
208  DCHECK_NE(state_, kError);
209  DCHECK(buffer.get());
210
211  // Make sure we are notified if http://crbug.com/49709 returns.  Issue also
212  // occurs with some damaged files.
213  if (!buffer->end_of_stream() && buffer->timestamp() == kNoTimestamp()) {
214    DVLOG(1) << "Received a buffer without timestamps!";
215    decode_cb.Run(kDecodeError);
216    return;
217  }
218
219  bool has_produced_frame;
220  do {
221    has_produced_frame = false;
222    if (!FFmpegDecode(buffer, &has_produced_frame)) {
223      state_ = kError;
224      decode_cb.Run(kDecodeError);
225      return;
226    }
227    // Repeat to flush the decoder after receiving EOS buffer.
228  } while (buffer->end_of_stream() && has_produced_frame);
229
230  if (buffer->end_of_stream())
231    state_ = kDecodeFinished;
232
233  decode_cb.Run(kOk);
234}
235
236bool FFmpegAudioDecoder::FFmpegDecode(
237    const scoped_refptr<DecoderBuffer>& buffer,
238    bool* has_produced_frame) {
239  DCHECK(!*has_produced_frame);
240
241  AVPacket packet;
242  av_init_packet(&packet);
243  if (buffer->end_of_stream()) {
244    packet.data = NULL;
245    packet.size = 0;
246  } else {
247    packet.data = const_cast<uint8*>(buffer->data());
248    packet.size = buffer->data_size();
249  }
250
251  // Each audio packet may contain several frames, so we must call the decoder
252  // until we've exhausted the packet.  Regardless of the packet size we always
253  // want to hand it to the decoder at least once, otherwise we would end up
254  // skipping end of stream packets since they have a size of zero.
255  do {
256    int frame_decoded = 0;
257    const int result = avcodec_decode_audio4(
258        codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
259
260    if (result < 0) {
261      DCHECK(!buffer->end_of_stream())
262          << "End of stream buffer produced an error! "
263          << "This is quite possibly a bug in the audio decoder not handling "
264          << "end of stream AVPackets correctly.";
265
266      MEDIA_LOG(log_cb_)
267          << "Dropping audio frame which failed decode with timestamp: "
268          << buffer->timestamp().InMicroseconds() << " us, duration: "
269          << buffer->duration().InMicroseconds() << " us, packet size: "
270          << buffer->data_size() << " bytes";
271
272      break;
273    }
274
275    // Update packet size and data pointer in case we need to call the decoder
276    // with the remaining bytes from this packet.
277    packet.size -= result;
278    packet.data += result;
279
280    scoped_refptr<AudioBuffer> output;
281    const int channels = DetermineChannels(av_frame_.get());
282    if (frame_decoded) {
283      if (av_frame_->sample_rate != config_.samples_per_second() ||
284          channels != ChannelLayoutToChannelCount(config_.channel_layout()) ||
285          av_frame_->format != av_sample_format_) {
286        DLOG(ERROR) << "Unsupported midstream configuration change!"
287                    << " Sample Rate: " << av_frame_->sample_rate << " vs "
288                    << config_.samples_per_second()
289                    << ", Channels: " << channels << " vs "
290                    << ChannelLayoutToChannelCount(config_.channel_layout())
291                    << ", Sample Format: " << av_frame_->format << " vs "
292                    << av_sample_format_;
293
294        if (config_.codec() == kCodecAAC &&
295            av_frame_->sample_rate == 2 * config_.samples_per_second()) {
296          MEDIA_LOG(log_cb_) << "Implicit HE-AAC signalling is being used."
297                             << " Please use mp4a.40.5 instead of mp4a.40.2 in"
298                             << " the mimetype.";
299        }
300        // This is an unrecoverable error, so bail out.
301        av_frame_unref(av_frame_.get());
302        return false;
303      }
304
305      // Get the AudioBuffer that the data was decoded into. Adjust the number
306      // of frames, in case fewer than requested were actually decoded.
307      output = reinterpret_cast<AudioBuffer*>(
308          av_buffer_get_opaque(av_frame_->buf[0]));
309
310      DCHECK_EQ(ChannelLayoutToChannelCount(config_.channel_layout()),
311                output->channel_count());
312      const int unread_frames = output->frame_count() - av_frame_->nb_samples;
313      DCHECK_GE(unread_frames, 0);
314      if (unread_frames > 0)
315        output->TrimEnd(unread_frames);
316      av_frame_unref(av_frame_.get());
317    }
318
319    // WARNING: |av_frame_| no longer has valid data at this point.
320    const int decoded_frames = frame_decoded ? output->frame_count() : 0;
321    if (IsEndOfStream(result, decoded_frames, buffer)) {
322      DCHECK_EQ(packet.size, 0);
323    } else if (discard_helper_->ProcessBuffers(buffer, output)) {
324      *has_produced_frame = true;
325      output_cb_.Run(output);
326    }
327  } while (packet.size > 0);
328
329  return true;
330}
331
332void FFmpegAudioDecoder::ReleaseFFmpegResources() {
333  codec_context_.reset();
334  av_frame_.reset();
335}
336
337bool FFmpegAudioDecoder::ConfigureDecoder() {
338  if (!config_.IsValidConfig()) {
339    DLOG(ERROR) << "Invalid audio stream -"
340                << " codec: " << config_.codec()
341                << " channel layout: " << config_.channel_layout()
342                << " bits per channel: " << config_.bits_per_channel()
343                << " samples per second: " << config_.samples_per_second();
344    return false;
345  }
346
347  if (config_.is_encrypted()) {
348    DLOG(ERROR) << "Encrypted audio stream not supported";
349    return false;
350  }
351
352  // Release existing decoder resources if necessary.
353  ReleaseFFmpegResources();
354
355  // Initialize AVCodecContext structure.
356  codec_context_.reset(avcodec_alloc_context3(NULL));
357  AudioDecoderConfigToAVCodecContext(config_, codec_context_.get());
358
359  codec_context_->opaque = this;
360  codec_context_->get_buffer2 = GetAudioBuffer;
361  codec_context_->refcounted_frames = 1;
362
363  AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
364  if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
365    DLOG(ERROR) << "Could not initialize audio decoder: "
366                << codec_context_->codec_id;
367    ReleaseFFmpegResources();
368    state_ = kUninitialized;
369    return false;
370  }
371
372  // Success!
373  av_frame_.reset(av_frame_alloc());
374  discard_helper_.reset(new AudioDiscardHelper(config_.samples_per_second(),
375                                               config_.codec_delay()));
376  av_sample_format_ = codec_context_->sample_fmt;
377
378  if (codec_context_->channels !=
379      ChannelLayoutToChannelCount(config_.channel_layout())) {
380    DLOG(ERROR) << "Audio configuration specified "
381                << ChannelLayoutToChannelCount(config_.channel_layout())
382                << " channels, but FFmpeg thinks the file contains "
383                << codec_context_->channels << " channels";
384    ReleaseFFmpegResources();
385    state_ = kUninitialized;
386    return false;
387  }
388
389  ResetTimestampState();
390  return true;
391}
392
393void FFmpegAudioDecoder::ResetTimestampState() {
394  discard_helper_->Reset(config_.codec_delay());
395}
396
397}  // namespace media
398