1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "media/cdm/ppapi/external_clear_key/ffmpeg_cdm_audio_decoder.h"
6
7#include <algorithm>
8
9#include "base/logging.h"
10#include "media/base/audio_bus.h"
11#include "media/base/audio_timestamp_helper.h"
12#include "media/base/buffers.h"
13#include "media/base/data_buffer.h"
14#include "media/base/limits.h"
15#include "media/ffmpeg/ffmpeg_common.h"
16
17// Include FFmpeg header files.
18extern "C" {
19// Temporarily disable possible loss of data warning.
20MSVC_PUSH_DISABLE_WARNING(4244);
21#include <libavcodec/avcodec.h>
22MSVC_POP_WARNING();
23}  // extern "C"
24
25namespace media {
26
27// Maximum number of channels with defined layout in src/media.
28static const int kMaxChannels = 8;
29
30static AVCodecID CdmAudioCodecToCodecID(
31    cdm::AudioDecoderConfig::AudioCodec audio_codec) {
32  switch (audio_codec) {
33    case cdm::AudioDecoderConfig::kCodecVorbis:
34      return AV_CODEC_ID_VORBIS;
35    case cdm::AudioDecoderConfig::kCodecAac:
36      return AV_CODEC_ID_AAC;
37    case cdm::AudioDecoderConfig::kUnknownAudioCodec:
38    default:
39      NOTREACHED() << "Unsupported cdm::AudioCodec: " << audio_codec;
40      return AV_CODEC_ID_NONE;
41  }
42}
43
44static void CdmAudioDecoderConfigToAVCodecContext(
45    const cdm::AudioDecoderConfig& config,
46    AVCodecContext* codec_context) {
47  codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
48  codec_context->codec_id = CdmAudioCodecToCodecID(config.codec);
49
50  switch (config.bits_per_channel) {
51    case 8:
52      codec_context->sample_fmt = AV_SAMPLE_FMT_U8;
53      break;
54    case 16:
55      codec_context->sample_fmt = AV_SAMPLE_FMT_S16;
56      break;
57    case 32:
58      codec_context->sample_fmt = AV_SAMPLE_FMT_S32;
59      break;
60    default:
61      DVLOG(1) << "CdmAudioDecoderConfigToAVCodecContext() Unsupported bits "
62                  "per channel: " << config.bits_per_channel;
63      codec_context->sample_fmt = AV_SAMPLE_FMT_NONE;
64  }
65
66  codec_context->channels = config.channel_count;
67  codec_context->sample_rate = config.samples_per_second;
68
69  if (config.extra_data) {
70    codec_context->extradata_size = config.extra_data_size;
71    codec_context->extradata = reinterpret_cast<uint8_t*>(
72        av_malloc(config.extra_data_size + FF_INPUT_BUFFER_PADDING_SIZE));
73    memcpy(codec_context->extradata, config.extra_data,
74           config.extra_data_size);
75    memset(codec_context->extradata + config.extra_data_size, '\0',
76           FF_INPUT_BUFFER_PADDING_SIZE);
77  } else {
78    codec_context->extradata = NULL;
79    codec_context->extradata_size = 0;
80  }
81}
82
83static cdm::AudioFormat AVSampleFormatToCdmAudioFormat(
84    AVSampleFormat sample_format) {
85  switch (sample_format) {
86    case AV_SAMPLE_FMT_U8:
87      return cdm::kAudioFormatU8;
88    case AV_SAMPLE_FMT_S16:
89      return cdm::kAudioFormatS16;
90    case AV_SAMPLE_FMT_S32:
91      return cdm::kAudioFormatS32;
92    case AV_SAMPLE_FMT_FLT:
93      return cdm::kAudioFormatF32;
94    case AV_SAMPLE_FMT_S16P:
95      return cdm::kAudioFormatPlanarS16;
96    case AV_SAMPLE_FMT_FLTP:
97      return cdm::kAudioFormatPlanarF32;
98    default:
99      DVLOG(1) << "Unknown AVSampleFormat: " << sample_format;
100  }
101  return cdm::kUnknownAudioFormat;
102}
103
104static void CopySamples(cdm::AudioFormat cdm_format,
105                        int decoded_audio_size,
106                        const AVFrame& av_frame,
107                        uint8_t* output_buffer) {
108  switch (cdm_format) {
109    case cdm::kAudioFormatU8:
110    case cdm::kAudioFormatS16:
111    case cdm::kAudioFormatS32:
112    case cdm::kAudioFormatF32:
113      memcpy(output_buffer, av_frame.data[0], decoded_audio_size);
114      break;
115    case cdm::kAudioFormatPlanarS16:
116    case cdm::kAudioFormatPlanarF32: {
117      const int decoded_size_per_channel =
118          decoded_audio_size / av_frame.channels;
119      for (int i = 0; i < av_frame.channels; ++i) {
120        memcpy(output_buffer,
121               av_frame.extended_data[i],
122               decoded_size_per_channel);
123        output_buffer += decoded_size_per_channel;
124      }
125      break;
126    }
127    default:
128      NOTREACHED() << "Unsupported CDM Audio Format!";
129      memset(output_buffer, 0, decoded_audio_size);
130  }
131}
132
133FFmpegCdmAudioDecoder::FFmpegCdmAudioDecoder(ClearKeyCdmHost* host)
134    : is_initialized_(false),
135      host_(host),
136      samples_per_second_(0),
137      channels_(0),
138      av_sample_format_(0),
139      bytes_per_frame_(0),
140      last_input_timestamp_(kNoTimestamp()),
141      output_bytes_to_drop_(0) {
142}
143
144FFmpegCdmAudioDecoder::~FFmpegCdmAudioDecoder() {
145  ReleaseFFmpegResources();
146}
147
148bool FFmpegCdmAudioDecoder::Initialize(const cdm::AudioDecoderConfig& config) {
149  DVLOG(1) << "Initialize()";
150  if (!IsValidConfig(config)) {
151    LOG(ERROR) << "Initialize(): invalid audio decoder configuration.";
152    return false;
153  }
154
155  if (is_initialized_) {
156    LOG(ERROR) << "Initialize(): Already initialized.";
157    return false;
158  }
159
160  // Initialize AVCodecContext structure.
161  codec_context_.reset(avcodec_alloc_context3(NULL));
162  CdmAudioDecoderConfigToAVCodecContext(config, codec_context_.get());
163
164  // MP3 decodes to S16P which we don't support, tell it to use S16 instead.
165  if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P)
166    codec_context_->request_sample_fmt = AV_SAMPLE_FMT_S16;
167
168  AVCodec* codec = avcodec_find_decoder(codec_context_->codec_id);
169  if (!codec || avcodec_open2(codec_context_.get(), codec, NULL) < 0) {
170    DLOG(ERROR) << "Could not initialize audio decoder: "
171                << codec_context_->codec_id;
172    return false;
173  }
174
175  // Ensure avcodec_open2() respected our format request.
176  if (codec_context_->sample_fmt == AV_SAMPLE_FMT_S16P) {
177    DLOG(ERROR) << "Unable to configure a supported sample format: "
178                << codec_context_->sample_fmt;
179    return false;
180  }
181
182  // Success!
183  av_frame_.reset(av_frame_alloc());
184  samples_per_second_ = config.samples_per_second;
185  bytes_per_frame_ = codec_context_->channels * config.bits_per_channel / 8;
186  output_timestamp_helper_.reset(
187      new AudioTimestampHelper(config.samples_per_second));
188  is_initialized_ = true;
189
190  // Store initial values to guard against midstream configuration changes.
191  channels_ = codec_context_->channels;
192  av_sample_format_ = codec_context_->sample_fmt;
193
194  return true;
195}
196
197void FFmpegCdmAudioDecoder::Deinitialize() {
198  DVLOG(1) << "Deinitialize()";
199  ReleaseFFmpegResources();
200  is_initialized_ = false;
201  ResetTimestampState();
202}
203
204void FFmpegCdmAudioDecoder::Reset() {
205  DVLOG(1) << "Reset()";
206  avcodec_flush_buffers(codec_context_.get());
207  ResetTimestampState();
208}
209
210// static
211bool FFmpegCdmAudioDecoder::IsValidConfig(
212    const cdm::AudioDecoderConfig& config) {
213  return config.codec != cdm::AudioDecoderConfig::kUnknownAudioCodec &&
214         config.channel_count > 0 &&
215         config.channel_count <= kMaxChannels &&
216         config.bits_per_channel > 0 &&
217         config.bits_per_channel <= limits::kMaxBitsPerSample &&
218         config.samples_per_second > 0 &&
219         config.samples_per_second <= limits::kMaxSampleRate;
220}
221
222cdm::Status FFmpegCdmAudioDecoder::DecodeBuffer(
223    const uint8_t* compressed_buffer,
224    int32_t compressed_buffer_size,
225    int64_t input_timestamp,
226    cdm::AudioFrames* decoded_frames) {
227  DVLOG(1) << "DecodeBuffer()";
228  const bool is_end_of_stream = !compressed_buffer;
229  base::TimeDelta timestamp =
230      base::TimeDelta::FromMicroseconds(input_timestamp);
231
232  bool is_vorbis = codec_context_->codec_id == AV_CODEC_ID_VORBIS;
233  if (!is_end_of_stream) {
234    if (last_input_timestamp_ == kNoTimestamp()) {
235      if (is_vorbis && timestamp < base::TimeDelta()) {
236        // Dropping frames for negative timestamps as outlined in section A.2
237        // in the Vorbis spec. http://xiph.org/vorbis/doc/Vorbis_I_spec.html
238        int frames_to_drop = floor(
239            0.5 + -timestamp.InSecondsF() * samples_per_second_);
240        output_bytes_to_drop_ = bytes_per_frame_ * frames_to_drop;
241      } else {
242        last_input_timestamp_ = timestamp;
243      }
244    } else if (timestamp != kNoTimestamp()) {
245      if (timestamp < last_input_timestamp_) {
246        base::TimeDelta diff = timestamp - last_input_timestamp_;
247        DVLOG(1) << "Input timestamps are not monotonically increasing! "
248                 << " ts " << timestamp.InMicroseconds() << " us"
249                 << " diff " << diff.InMicroseconds() << " us";
250        return cdm::kDecodeError;
251      }
252
253      last_input_timestamp_ = timestamp;
254    }
255  }
256
257  AVPacket packet;
258  av_init_packet(&packet);
259  packet.data = const_cast<uint8_t*>(compressed_buffer);
260  packet.size = compressed_buffer_size;
261
262  // Tell the CDM what AudioFormat we're using.
263  const cdm::AudioFormat cdm_format = AVSampleFormatToCdmAudioFormat(
264      static_cast<AVSampleFormat>(av_sample_format_));
265  DCHECK_NE(cdm_format, cdm::kUnknownAudioFormat);
266  decoded_frames->SetFormat(cdm_format);
267
268  // Each audio packet may contain several frames, so we must call the decoder
269  // until we've exhausted the packet.  Regardless of the packet size we always
270  // want to hand it to the decoder at least once, otherwise we would end up
271  // skipping end of stream packets since they have a size of zero.
272  do {
273    // Reset frame to default values.
274    av_frame_unref(av_frame_.get());
275
276    int frame_decoded = 0;
277    int result = avcodec_decode_audio4(
278        codec_context_.get(), av_frame_.get(), &frame_decoded, &packet);
279
280    if (result < 0) {
281      DCHECK(!is_end_of_stream)
282          << "End of stream buffer produced an error! "
283          << "This is quite possibly a bug in the audio decoder not handling "
284          << "end of stream AVPackets correctly.";
285
286      DLOG(ERROR)
287          << "Error decoding an audio frame with timestamp: "
288          << timestamp.InMicroseconds() << " us, duration: "
289          << timestamp.InMicroseconds() << " us, packet size: "
290          << compressed_buffer_size << " bytes";
291
292      return cdm::kDecodeError;
293    }
294
295    // Update packet size and data pointer in case we need to call the decoder
296    // with the remaining bytes from this packet.
297    packet.size -= result;
298    packet.data += result;
299
300    if (output_timestamp_helper_->base_timestamp() == kNoTimestamp() &&
301        !is_end_of_stream) {
302      DCHECK(timestamp != kNoTimestamp());
303      if (output_bytes_to_drop_ > 0) {
304        // Currently Vorbis is the only codec that causes us to drop samples.
305        // If we have to drop samples it always means the timeline starts at 0.
306        DCHECK_EQ(codec_context_->codec_id, AV_CODEC_ID_VORBIS);
307        output_timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
308      } else {
309        output_timestamp_helper_->SetBaseTimestamp(timestamp);
310      }
311    }
312
313    int decoded_audio_size = 0;
314    if (frame_decoded) {
315      if (av_frame_->sample_rate != samples_per_second_ ||
316          av_frame_->channels != channels_ ||
317          av_frame_->format != av_sample_format_) {
318        DLOG(ERROR) << "Unsupported midstream configuration change!"
319                    << " Sample Rate: " << av_frame_->sample_rate << " vs "
320                    << samples_per_second_
321                    << ", Channels: " << av_frame_->channels << " vs "
322                    << channels_
323                    << ", Sample Format: " << av_frame_->format << " vs "
324                    << av_sample_format_;
325        return cdm::kDecodeError;
326      }
327
328      decoded_audio_size = av_samples_get_buffer_size(
329          NULL, codec_context_->channels, av_frame_->nb_samples,
330          codec_context_->sample_fmt, 1);
331    }
332
333    if (decoded_audio_size > 0 && output_bytes_to_drop_ > 0) {
334      DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
335          << "Decoder didn't output full frames";
336
337      int dropped_size = std::min(decoded_audio_size, output_bytes_to_drop_);
338      decoded_audio_size -= dropped_size;
339      output_bytes_to_drop_ -= dropped_size;
340    }
341
342    if (decoded_audio_size > 0) {
343      DCHECK_EQ(decoded_audio_size % bytes_per_frame_, 0)
344          << "Decoder didn't output full frames";
345
346      base::TimeDelta output_timestamp =
347          output_timestamp_helper_->GetTimestamp();
348      output_timestamp_helper_->AddFrames(decoded_audio_size /
349                                          bytes_per_frame_);
350
351      // If we've exhausted the packet in the first decode we can write directly
352      // into the frame buffer instead of a multistep serialization approach.
353      if (serialized_audio_frames_.empty() && !packet.size) {
354        const uint32_t buffer_size = decoded_audio_size + sizeof(int64) * 2;
355        decoded_frames->SetFrameBuffer(host_->Allocate(buffer_size));
356        if (!decoded_frames->FrameBuffer()) {
357          LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed.";
358          return cdm::kDecodeError;
359        }
360        decoded_frames->FrameBuffer()->SetSize(buffer_size);
361        uint8_t* output_buffer = decoded_frames->FrameBuffer()->Data();
362
363        const int64 timestamp = output_timestamp.InMicroseconds();
364        memcpy(output_buffer, &timestamp, sizeof(timestamp));
365        output_buffer += sizeof(timestamp);
366
367        const int64 output_size = decoded_audio_size;
368        memcpy(output_buffer, &output_size, sizeof(output_size));
369        output_buffer += sizeof(output_size);
370
371        // Copy the samples and return success.
372        CopySamples(
373            cdm_format, decoded_audio_size, *av_frame_, output_buffer);
374        return cdm::kSuccess;
375      }
376
377      // There are still more frames to decode, so we need to serialize them in
378      // a secondary buffer since we don't know their sizes ahead of time (which
379      // is required to allocate the FrameBuffer object).
380      SerializeInt64(output_timestamp.InMicroseconds());
381      SerializeInt64(decoded_audio_size);
382
383      const size_t previous_size = serialized_audio_frames_.size();
384      serialized_audio_frames_.resize(previous_size + decoded_audio_size);
385      uint8_t* output_buffer = &serialized_audio_frames_[0] + previous_size;
386      CopySamples(
387          cdm_format, decoded_audio_size, *av_frame_, output_buffer);
388    }
389  } while (packet.size > 0);
390
391  if (!serialized_audio_frames_.empty()) {
392    decoded_frames->SetFrameBuffer(
393        host_->Allocate(serialized_audio_frames_.size()));
394    if (!decoded_frames->FrameBuffer()) {
395      LOG(ERROR) << "DecodeBuffer() ClearKeyCdmHost::Allocate failed.";
396      return cdm::kDecodeError;
397    }
398    memcpy(decoded_frames->FrameBuffer()->Data(),
399           &serialized_audio_frames_[0],
400           serialized_audio_frames_.size());
401    decoded_frames->FrameBuffer()->SetSize(serialized_audio_frames_.size());
402    serialized_audio_frames_.clear();
403
404    return cdm::kSuccess;
405  }
406
407  return cdm::kNeedMoreData;
408}
409
410void FFmpegCdmAudioDecoder::ResetTimestampState() {
411  output_timestamp_helper_->SetBaseTimestamp(kNoTimestamp());
412  last_input_timestamp_ = kNoTimestamp();
413  output_bytes_to_drop_ = 0;
414}
415
416void FFmpegCdmAudioDecoder::ReleaseFFmpegResources() {
417  DVLOG(1) << "ReleaseFFmpegResources()";
418
419  codec_context_.reset();
420  av_frame_.reset();
421}
422
423void FFmpegCdmAudioDecoder::SerializeInt64(int64 value) {
424  const size_t previous_size = serialized_audio_frames_.size();
425  serialized_audio_frames_.resize(previous_size + sizeof(value));
426  memcpy(&serialized_audio_frames_[0] + previous_size, &value, sizeof(value));
427}
428
429}  // namespace media
430