renderer/media/webrtc_audio_renderer.cc

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "content/renderer/media/webrtc_audio_renderer.h"

#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_util.h"
#include "content/renderer/media/audio_device_factory.h"
#include "content/renderer/media/webrtc_audio_device_impl.h"
#include "content/renderer/render_thread_impl.h"
#include "media/audio/audio_output_device.h"
#include "media/audio/audio_parameters.h"
#include "media/audio/sample_rates.h"
#include "media/base/audio_hardware_config.h"

#if defined(OS_WIN)
#include "base/win/windows_version.h"
#include "media/audio/win/core_audio_util_win.h"
#endif

namespace content {

namespace {

// Supported hardware sample rates for output sides.
#if defined(OS_WIN) || defined(OS_MACOSX)
// AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its
// current sample rate (set by the user) on Windows and Mac OS X.  The listed
// rates below adds restrictions and Initialize() will fail if the user selects
// any rate outside these ranges.
const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};
#elif defined(OS_LINUX) || defined(OS_OPENBSD)
const int kValidOutputRates[] = {48000, 44100};
#elif defined(OS_ANDROID)
// TODO(leozwang): We want to use native sampling rate on Android to achieve
// low latency, currently 16000 is used to work around audio problem on some
// Android devices.
const int kValidOutputRates[] = {48000, 44100, 16000};
const int kDefaultOutputBufferSize = 2048;
#else
const int kValidOutputRates[] = {44100};
#endif

// TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.
enum AudioFramesPerBuffer {
  k160,
  k320,
  k440,
  k480,
  k640,
  k880,
  k960,
  k1440,
  k1920,
  kUnexpectedAudioBufferSize  // Must always be last!
};

// Helper method to convert integral values to their respective enum values
// above, or kUnexpectedAudioBufferSize if no match exists.
// We map 441 to k440 to avoid changes in the XML part for histograms.
// It is still possible to map the histogram result to the actual buffer size.
// See http://crbug.com/243450 for details.
AudioFramesPerBuffer AsAudioFramesPerBuffer(int frames_per_buffer) {
  switch (frames_per_buffer) {
    case 160: return k160;
    case 320: return k320;
    case 441: return k440;
    case 480: return k480;
    case 640: return k640;
    case 880: return k880;
    case 960: return k960;
    case 1440: return k1440;
    case 1920: return k1920;
  }
  return kUnexpectedAudioBufferSize;
}

void AddHistogramFramesPerBuffer(int param) {
  AudioFramesPerBuffer afpb = AsAudioFramesPerBuffer(param);
  if (afpb != kUnexpectedAudioBufferSize) {
    UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
                              afpb, kUnexpectedAudioBufferSize);
  } else {
    // Report unexpected sample rates using a unique histogram name.
    UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputFramesPerBufferUnexpected", param);
  }
}

}  // namespace

WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id)
    : state_(UNINITIALIZED),
      source_render_view_id_(source_render_view_id),
      source_(NULL),
      play_ref_count_(0),
      audio_delay_milliseconds_(0),
      fifo_delay_milliseconds_(0) {
}

WebRtcAudioRenderer::~WebRtcAudioRenderer() {
  DCHECK(thread_checker_.CalledOnValidThread());
  DCHECK_EQ(state_, UNINITIALIZED);
  buffer_.reset();
}

bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {
  DVLOG(1) << "WebRtcAudioRenderer::Initialize()";
  DCHECK(thread_checker_.CalledOnValidThread());
  base::AutoLock auto_lock(lock_);
  DCHECK_EQ(state_, UNINITIALIZED);
  DCHECK(source);
  DCHECK(!sink_.get());
  DCHECK(!source_);

  // Use stereo output on all platforms exept Android.
  media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO;
#if defined(OS_ANDROID)
  DVLOG(1) << "Using mono audio output for Android";
  channel_layout = media::CHANNEL_LAYOUT_MONO;
#endif
  // Ask the renderer for the default audio output hardware sample-rate.
  media::AudioHardwareConfig* hardware_config =
      RenderThreadImpl::current()->GetAudioHardwareConfig();
  int sample_rate = hardware_config->GetOutputSampleRate();
  DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;

  // WebRTC does not yet support higher rates than 96000 on the client side
  // and 48000 is the preferred sample rate. Therefore, if 192000 is detected,
  // we change the rate to 48000 instead. The consequence is that the native
  // layer will be opened up at 192kHz but WebRTC will provide data at 48kHz
  // which will then be resampled by the audio converted on the browser side
  // to match the native audio layer.
  if (sample_rate == 192000) {
    DVLOG(1) << "Resampling from 48000 to 192000 is required";
    sample_rate = 48000;
  }
  media::AudioSampleRate asr = media::AsAudioSampleRate(sample_rate);
  if (asr != media::kUnexpectedAudioSampleRate) {
    UMA_HISTOGRAM_ENUMERATION(
        "WebRTC.AudioOutputSampleRate", asr, media::kUnexpectedAudioSampleRate);
  } else {
    UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputSampleRateUnexpected", sample_rate);
  }

  // Verify that the reported output hardware sample rate is supported
  // on the current platform.
  if (std::find(&kValidOutputRates[0],
                &kValidOutputRates[0] + arraysize(kValidOutputRates),
                sample_rate) ==
                    &kValidOutputRates[arraysize(kValidOutputRates)]) {
    DLOG(ERROR) << sample_rate << " is not a supported output rate.";
    return false;
  }

  // Set up audio parameters for the source, i.e., the WebRTC client.

  // The WebRTC client only supports multiples of 10ms as buffer size where
  // 10ms is preferred for lowest possible delay.
  media::AudioParameters source_params;
  int buffer_size = (sample_rate / 100);
  DVLOG(1) << "Using WebRTC output buffer size: " << buffer_size;

  int channels = ChannelLayoutToChannelCount(channel_layout);
  source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
                      channel_layout, channels, 0,
                      sample_rate, 16, buffer_size);

  // Set up audio parameters for the sink, i.e., the native audio output stream.
  // We strive to open up using native parameters to achieve best possible
  // performance and to ensure that no FIFO is needed on the browser side to
  // match the client request. Any mismatch between the source and the sink is
  // taken care of in this class instead using a pull FIFO.

  media::AudioParameters sink_params;

#if defined(OS_ANDROID)
  buffer_size = kDefaultOutputBufferSize;
#else
  buffer_size = hardware_config->GetOutputBufferSize();
#endif

  sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
                    channel_layout, channels, 0, sample_rate, 16, buffer_size);

  // Create a FIFO if re-buffering is required to match the source input with
  // the sink request. The source acts as provider here and the sink as
  // consumer.
  fifo_delay_milliseconds_ = 0;
  if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {
    DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()
             << " to " << sink_params.frames_per_buffer();
    audio_fifo_.reset(new media::AudioPullFifo(
        source_params.channels(),
        source_params.frames_per_buffer(),
        base::Bind(
            &WebRtcAudioRenderer::SourceCallback,
            base::Unretained(this))));

    if (sink_params.frames_per_buffer() > source_params.frames_per_buffer()) {
      int frame_duration_milliseconds = base::Time::kMillisecondsPerSecond /
          static_cast<double>(source_params.sample_rate());
      fifo_delay_milliseconds_ = (sink_params.frames_per_buffer() -
        source_params.frames_per_buffer()) * frame_duration_milliseconds;
    }
  }


  // Allocate local audio buffers based on the parameters above.
  // It is assumed that each audio sample contains 16 bits and each
  // audio frame contains one or two audio samples depending on the
  // number of channels.
  buffer_.reset(
      new int16[source_params.frames_per_buffer() * source_params.channels()]);

  source_ = source;
  source->SetRenderFormat(source_params);

  // Configure the audio rendering client and start rendering.
  sink_ = AudioDeviceFactory::NewOutputDevice(source_render_view_id_);
  sink_->Initialize(sink_params, this);
  sink_->Start();

  // User must call Play() before any audio can be heard.
  state_ = PAUSED;

  UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",
                            source_params.channel_layout(),
                            media::CHANNEL_LAYOUT_MAX);
  UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
                            source_params.frames_per_buffer(),
                            kUnexpectedAudioBufferSize);
  AddHistogramFramesPerBuffer(source_params.frames_per_buffer());

  return true;
}

void WebRtcAudioRenderer::Start() {
  // TODO(xians): refactor to make usage of Start/Stop more symmetric.
  NOTIMPLEMENTED();
}

void WebRtcAudioRenderer::Play() {
  DVLOG(1) << "WebRtcAudioRenderer::Play()";
  DCHECK(thread_checker_.CalledOnValidThread());
  base::AutoLock auto_lock(lock_);
  if (state_ == UNINITIALIZED)
    return;

  DCHECK(play_ref_count_ == 0 || state_ == PLAYING);
  ++play_ref_count_;
  state_ = PLAYING;

  if (audio_fifo_) {
    audio_delay_milliseconds_ = 0;
    audio_fifo_->Clear();
  }
}

void WebRtcAudioRenderer::Pause() {
  DVLOG(1) << "WebRtcAudioRenderer::Pause()";
  DCHECK(thread_checker_.CalledOnValidThread());
  base::AutoLock auto_lock(lock_);
  if (state_ == UNINITIALIZED)
    return;

  DCHECK_EQ(state_, PLAYING);
  DCHECK_GT(play_ref_count_, 0);
  if (!--play_ref_count_)
    state_ = PAUSED;
}

void WebRtcAudioRenderer::Stop() {
  DVLOG(1) << "WebRtcAudioRenderer::Stop()";
  DCHECK(thread_checker_.CalledOnValidThread());
  base::AutoLock auto_lock(lock_);
  if (state_ == UNINITIALIZED)
    return;

  source_->RemoveAudioRenderer(this);
  source_ = NULL;
  sink_->Stop();
  state_ = UNINITIALIZED;
}

void WebRtcAudioRenderer::SetVolume(float volume) {
  DCHECK(thread_checker_.CalledOnValidThread());
  base::AutoLock auto_lock(lock_);
  if (state_ == UNINITIALIZED)
    return;

  sink_->SetVolume(volume);
}

base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {
  return base::TimeDelta();
}

bool WebRtcAudioRenderer::IsLocalRenderer() const {
  return false;
}

int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,
                                int audio_delay_milliseconds) {
  base::AutoLock auto_lock(lock_);
  if (!source_)
    return 0;

  DVLOG(2) << "WebRtcAudioRenderer::Render()";
  DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds;

  audio_delay_milliseconds_ = audio_delay_milliseconds;

  if (audio_fifo_)
    audio_fifo_->Consume(audio_bus, audio_bus->frames());
  else
    SourceCallback(0, audio_bus);

  return (state_ == PLAYING) ? audio_bus->frames() : 0;
}

void WebRtcAudioRenderer::OnRenderError() {
  NOTIMPLEMENTED();
  LOG(ERROR) << "OnRenderError()";
}

// Called by AudioPullFifo when more data is necessary.
void WebRtcAudioRenderer::SourceCallback(
    int fifo_frame_delay, media::AudioBus* audio_bus) {
  DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("
           << fifo_frame_delay << ", "
           << audio_bus->frames() << ")";

  int output_delay_milliseconds = audio_delay_milliseconds_;
  output_delay_milliseconds += fifo_delay_milliseconds_;
  DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds;

  // We need to keep render data for the |source_| regardless of |state_|,
  // otherwise the data will be buffered up inside |source_|.
  source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),
                      audio_bus->channels(), audio_bus->frames(),
                      output_delay_milliseconds);

  // Avoid filling up the audio bus if we are not playing; instead
  // return here and ensure that the returned value in Render() is 0.
  if (state_ != PLAYING) {
    audio_bus->Zero();
    return;
  }

  // De-interleave each channel and convert to 32-bit floating-point
  // with nominal range -1.0 -> +1.0 to match the callback format.
  audio_bus->FromInterleaved(buffer_.get(),
                             audio_bus->frames(),
                             sizeof(buffer_[0]));
}

}  // namespace content