1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/renderer/media/webrtc_audio_capturer.h"
6
7#include "base/bind.h"
8#include "base/logging.h"
9#include "base/metrics/histogram.h"
10#include "base/strings/string_util.h"
11#include "base/strings/stringprintf.h"
12#include "content/child/child_process.h"
13#include "content/renderer/media/audio_device_factory.h"
14#include "content/renderer/media/media_stream_audio_processor.h"
15#include "content/renderer/media/media_stream_audio_processor_options.h"
16#include "content/renderer/media/media_stream_audio_source.h"
17#include "content/renderer/media/webrtc_audio_device_impl.h"
18#include "content/renderer/media/webrtc_local_audio_track.h"
19#include "content/renderer/media/webrtc_logging.h"
20#include "media/audio/sample_rates.h"
21
22namespace content {
23
24namespace {
25
26// Supported hardware sample rates for input and output sides.
27#if defined(OS_WIN) || defined(OS_MACOSX)
28// media::GetAudioInputHardwareSampleRate() asks the audio layer
29// for its current sample rate (set by the user) on Windows and Mac OS X.
30// The listed rates below adds restrictions and WebRtcAudioDeviceImpl::Init()
31// will fail if the user selects any rate outside these ranges.
32const int kValidInputRates[] =
33    {192000, 96000, 48000, 44100, 32000, 16000, 8000};
34#elif defined(OS_LINUX) || defined(OS_OPENBSD)
35const int kValidInputRates[] = {48000, 44100};
36#elif defined(OS_ANDROID)
37const int kValidInputRates[] = {48000, 44100};
38#else
39const int kValidInputRates[] = {44100};
40#endif
41
42// Time constant for AudioPowerMonitor.  See AudioPowerMonitor ctor comments
43// for semantics.  This value was arbitrarily chosen, but seems to work well.
44const int kPowerMonitorTimeConstantMs = 10;
45
46// The time between two audio power level samples.
47const int kPowerMonitorLogIntervalSeconds = 10;
48
49}  // namespace
50
51// Reference counted container of WebRtcLocalAudioTrack delegate.
52// TODO(xians): Switch to MediaStreamAudioSinkOwner.
53class WebRtcAudioCapturer::TrackOwner
54    : public base::RefCountedThreadSafe<WebRtcAudioCapturer::TrackOwner> {
55 public:
56  explicit TrackOwner(WebRtcLocalAudioTrack* track)
57      : delegate_(track) {}
58
59  void Capture(const int16* audio_data,
60               base::TimeDelta delay,
61               double volume,
62               bool key_pressed,
63               bool need_audio_processing) {
64    base::AutoLock lock(lock_);
65    if (delegate_) {
66      delegate_->Capture(audio_data,
67                         delay,
68                         volume,
69                         key_pressed,
70                         need_audio_processing);
71    }
72  }
73
74  void OnSetFormat(const media::AudioParameters& params) {
75    base::AutoLock lock(lock_);
76    if (delegate_)
77      delegate_->OnSetFormat(params);
78  }
79
80  void SetAudioProcessor(
81      const scoped_refptr<MediaStreamAudioProcessor>& processor) {
82    base::AutoLock lock(lock_);
83    if (delegate_)
84      delegate_->SetAudioProcessor(processor);
85  }
86
87  void Reset() {
88    base::AutoLock lock(lock_);
89    delegate_ = NULL;
90  }
91
92  void Stop() {
93    base::AutoLock lock(lock_);
94    DCHECK(delegate_);
95
96    // This can be reentrant so reset |delegate_| before calling out.
97    WebRtcLocalAudioTrack* temp = delegate_;
98    delegate_ = NULL;
99    temp->Stop();
100  }
101
102  // Wrapper which allows to use std::find_if() when adding and removing
103  // sinks to/from the list.
104  struct TrackWrapper {
105    TrackWrapper(WebRtcLocalAudioTrack* track) : track_(track) {}
106    bool operator()(
107        const scoped_refptr<WebRtcAudioCapturer::TrackOwner>& owner) const {
108      return owner->IsEqual(track_);
109    }
110    WebRtcLocalAudioTrack* track_;
111  };
112
113 protected:
114  virtual ~TrackOwner() {}
115
116 private:
117  friend class base::RefCountedThreadSafe<WebRtcAudioCapturer::TrackOwner>;
118
119  bool IsEqual(const WebRtcLocalAudioTrack* other) const {
120    base::AutoLock lock(lock_);
121    return (other == delegate_);
122  }
123
124  // Do NOT reference count the |delegate_| to avoid cyclic reference counting.
125  WebRtcLocalAudioTrack* delegate_;
126  mutable base::Lock lock_;
127
128  DISALLOW_COPY_AND_ASSIGN(TrackOwner);
129};
130
131// static
132scoped_refptr<WebRtcAudioCapturer> WebRtcAudioCapturer::CreateCapturer(
133    int render_view_id, const StreamDeviceInfo& device_info,
134    const blink::WebMediaConstraints& constraints,
135    WebRtcAudioDeviceImpl* audio_device,
136    MediaStreamAudioSource* audio_source) {
137  scoped_refptr<WebRtcAudioCapturer> capturer = new WebRtcAudioCapturer(
138      render_view_id, device_info, constraints, audio_device, audio_source);
139  if (capturer->Initialize())
140    return capturer;
141
142  return NULL;
143}
144
145bool WebRtcAudioCapturer::Initialize() {
146  DCHECK(thread_checker_.CalledOnValidThread());
147  DVLOG(1) << "WebRtcAudioCapturer::Initialize()";
148  WebRtcLogMessage(base::StringPrintf(
149      "WAC::Initialize. render_view_id=%d"
150      ", channel_layout=%d, sample_rate=%d, buffer_size=%d"
151      ", session_id=%d, paired_output_sample_rate=%d"
152      ", paired_output_frames_per_buffer=%d, effects=%d. ",
153      render_view_id_,
154      device_info_.device.input.channel_layout,
155      device_info_.device.input.sample_rate,
156      device_info_.device.input.frames_per_buffer,
157      device_info_.session_id,
158      device_info_.device.matched_output.sample_rate,
159      device_info_.device.matched_output.frames_per_buffer,
160      device_info_.device.input.effects));
161
162  if (render_view_id_ == -1) {
163    // Return true here to allow injecting a new source via
164    // SetCapturerSourceForTesting() at a later state.
165    return true;
166  }
167
168  MediaAudioConstraints audio_constraints(constraints_,
169                                          device_info_.device.input.effects);
170  if (!audio_constraints.IsValid())
171    return false;
172
173  media::ChannelLayout channel_layout = static_cast<media::ChannelLayout>(
174      device_info_.device.input.channel_layout);
175  DVLOG(1) << "Audio input hardware channel layout: " << channel_layout;
176  UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioInputChannelLayout",
177                            channel_layout, media::CHANNEL_LAYOUT_MAX + 1);
178
179  // Verify that the reported input channel configuration is supported.
180  if (channel_layout != media::CHANNEL_LAYOUT_MONO &&
181      channel_layout != media::CHANNEL_LAYOUT_STEREO &&
182      channel_layout != media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC) {
183    DLOG(ERROR) << channel_layout
184                << " is not a supported input channel configuration.";
185    return false;
186  }
187
188  DVLOG(1) << "Audio input hardware sample rate: "
189           << device_info_.device.input.sample_rate;
190  media::AudioSampleRate asr;
191  if (media::ToAudioSampleRate(device_info_.device.input.sample_rate, &asr)) {
192    UMA_HISTOGRAM_ENUMERATION(
193        "WebRTC.AudioInputSampleRate", asr, media::kAudioSampleRateMax + 1);
194  } else {
195    UMA_HISTOGRAM_COUNTS("WebRTC.AudioInputSampleRateUnexpected",
196                         device_info_.device.input.sample_rate);
197  }
198
199  // Verify that the reported input hardware sample rate is supported
200  // on the current platform.
201  if (std::find(&kValidInputRates[0],
202                &kValidInputRates[0] + arraysize(kValidInputRates),
203                device_info_.device.input.sample_rate) ==
204          &kValidInputRates[arraysize(kValidInputRates)]) {
205    DLOG(ERROR) << device_info_.device.input.sample_rate
206                << " is not a supported input rate.";
207    return false;
208  }
209
210  // Create and configure the default audio capturing source.
211  SetCapturerSource(AudioDeviceFactory::NewInputDevice(render_view_id_),
212                    channel_layout,
213                    static_cast<float>(device_info_.device.input.sample_rate));
214
215  // Add the capturer to the WebRtcAudioDeviceImpl since it needs some hardware
216  // information from the capturer.
217  if (audio_device_)
218    audio_device_->AddAudioCapturer(this);
219
220  return true;
221}
222
223WebRtcAudioCapturer::WebRtcAudioCapturer(
224    int render_view_id,
225    const StreamDeviceInfo& device_info,
226    const blink::WebMediaConstraints& constraints,
227    WebRtcAudioDeviceImpl* audio_device,
228    MediaStreamAudioSource* audio_source)
229    : constraints_(constraints),
230      audio_processor_(
231          new talk_base::RefCountedObject<MediaStreamAudioProcessor>(
232              constraints, device_info.device.input.effects, audio_device)),
233      running_(false),
234      render_view_id_(render_view_id),
235      device_info_(device_info),
236      volume_(0),
237      peer_connection_mode_(false),
238      key_pressed_(false),
239      need_audio_processing_(false),
240      audio_device_(audio_device),
241      audio_source_(audio_source),
242      audio_power_monitor_(
243          device_info_.device.input.sample_rate,
244          base::TimeDelta::FromMilliseconds(kPowerMonitorTimeConstantMs)) {
245  DVLOG(1) << "WebRtcAudioCapturer::WebRtcAudioCapturer()";
246}
247
248WebRtcAudioCapturer::~WebRtcAudioCapturer() {
249  DCHECK(thread_checker_.CalledOnValidThread());
250  DCHECK(tracks_.IsEmpty());
251  DVLOG(1) << "WebRtcAudioCapturer::~WebRtcAudioCapturer()";
252  Stop();
253}
254
255void WebRtcAudioCapturer::AddTrack(WebRtcLocalAudioTrack* track) {
256  DCHECK(track);
257  DVLOG(1) << "WebRtcAudioCapturer::AddTrack()";
258
259  {
260    base::AutoLock auto_lock(lock_);
261    // Verify that |track| is not already added to the list.
262    DCHECK(!tracks_.Contains(TrackOwner::TrackWrapper(track)));
263
264    // Add with a tag, so we remember to call OnSetFormat() on the new
265    // track.
266    scoped_refptr<TrackOwner> track_owner(new TrackOwner(track));
267    tracks_.AddAndTag(track_owner);
268  }
269}
270
271void WebRtcAudioCapturer::RemoveTrack(WebRtcLocalAudioTrack* track) {
272  DCHECK(thread_checker_.CalledOnValidThread());
273  DVLOG(1) << "WebRtcAudioCapturer::RemoveTrack()";
274  bool stop_source = false;
275  {
276    base::AutoLock auto_lock(lock_);
277
278    scoped_refptr<TrackOwner> removed_item =
279        tracks_.Remove(TrackOwner::TrackWrapper(track));
280
281    // Clear the delegate to ensure that no more capture callbacks will
282    // be sent to this sink. Also avoids a possible crash which can happen
283    // if this method is called while capturing is active.
284    if (removed_item.get()) {
285      removed_item->Reset();
286      stop_source = tracks_.IsEmpty();
287    }
288  }
289  if (stop_source) {
290    // Since WebRtcAudioCapturer does not inherit MediaStreamAudioSource,
291    // and instead MediaStreamAudioSource is composed of a WebRtcAudioCapturer,
292    // we have to call StopSource on the MediaStreamSource. This will call
293    // MediaStreamAudioSource::DoStopSource which in turn call
294    // WebRtcAudioCapturerer::Stop();
295    audio_source_->StopSource();
296  }
297}
298
299void WebRtcAudioCapturer::SetCapturerSource(
300    const scoped_refptr<media::AudioCapturerSource>& source,
301    media::ChannelLayout channel_layout,
302    float sample_rate) {
303  DCHECK(thread_checker_.CalledOnValidThread());
304  DVLOG(1) << "SetCapturerSource(channel_layout=" << channel_layout << ","
305           << "sample_rate=" << sample_rate << ")";
306  scoped_refptr<media::AudioCapturerSource> old_source;
307  {
308    base::AutoLock auto_lock(lock_);
309    if (source_.get() == source.get())
310      return;
311
312    source_.swap(old_source);
313    source_ = source;
314
315    // Reset the flag to allow starting the new source.
316    running_ = false;
317  }
318
319  DVLOG(1) << "Switching to a new capture source.";
320  if (old_source.get())
321    old_source->Stop();
322
323  // Dispatch the new parameters both to the sink(s) and to the new source,
324  // also apply the new |constraints|.
325  // The idea is to get rid of any dependency of the microphone parameters
326  // which would normally be used by default.
327  // bits_per_sample is always 16 for now.
328  int buffer_size = GetBufferSize(sample_rate);
329  media::AudioParameters params(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
330                                channel_layout, 0, sample_rate,
331                                16, buffer_size,
332                                device_info_.device.input.effects);
333
334  {
335    base::AutoLock auto_lock(lock_);
336    // Notify the |audio_processor_| of the new format.
337    audio_processor_->OnCaptureFormatChanged(params);
338
339    MediaAudioConstraints audio_constraints(constraints_,
340                                            device_info_.device.input.effects);
341    need_audio_processing_ = audio_constraints.NeedsAudioProcessing();
342    // Notify all tracks about the new format.
343    tracks_.TagAll();
344  }
345
346  if (source.get())
347    source->Initialize(params, this, session_id());
348
349  Start();
350}
351
352void WebRtcAudioCapturer::EnablePeerConnectionMode() {
353  DCHECK(thread_checker_.CalledOnValidThread());
354  DVLOG(1) << "EnablePeerConnectionMode";
355  // Do nothing if the peer connection mode has been enabled.
356  if (peer_connection_mode_)
357    return;
358
359  peer_connection_mode_ = true;
360  int render_view_id = -1;
361  media::AudioParameters input_params;
362  {
363    base::AutoLock auto_lock(lock_);
364    // Simply return if there is no existing source or the |render_view_id_| is
365    // not valid.
366    if (!source_.get() || render_view_id_== -1)
367      return;
368
369    render_view_id = render_view_id_;
370    input_params = audio_processor_->InputFormat();
371  }
372
373  // Do nothing if the current buffer size is the WebRtc native buffer size.
374  if (GetBufferSize(input_params.sample_rate()) ==
375          input_params.frames_per_buffer()) {
376    return;
377  }
378
379  // Create a new audio stream as source which will open the hardware using
380  // WebRtc native buffer size.
381  SetCapturerSource(AudioDeviceFactory::NewInputDevice(render_view_id),
382                    input_params.channel_layout(),
383                    static_cast<float>(input_params.sample_rate()));
384}
385
386void WebRtcAudioCapturer::Start() {
387  DCHECK(thread_checker_.CalledOnValidThread());
388  DVLOG(1) << "WebRtcAudioCapturer::Start()";
389  base::AutoLock auto_lock(lock_);
390  if (running_ || !source_)
391    return;
392
393  // Start the data source, i.e., start capturing data from the current source.
394  // We need to set the AGC control before starting the stream.
395  source_->SetAutomaticGainControl(true);
396  source_->Start();
397  running_ = true;
398}
399
400void WebRtcAudioCapturer::Stop() {
401  DCHECK(thread_checker_.CalledOnValidThread());
402  DVLOG(1) << "WebRtcAudioCapturer::Stop()";
403  scoped_refptr<media::AudioCapturerSource> source;
404  TrackList::ItemList tracks;
405  {
406    base::AutoLock auto_lock(lock_);
407    if (!running_)
408      return;
409
410    source = source_;
411    tracks = tracks_.Items();
412    tracks_.Clear();
413    running_ = false;
414  }
415
416  // Remove the capturer object from the WebRtcAudioDeviceImpl.
417  if (audio_device_)
418    audio_device_->RemoveAudioCapturer(this);
419
420  for (TrackList::ItemList::const_iterator it = tracks.begin();
421       it != tracks.end();
422       ++it) {
423    (*it)->Stop();
424  }
425
426  if (source.get())
427    source->Stop();
428
429  // Stop the audio processor to avoid feeding render data into the processor.
430  audio_processor_->Stop();
431}
432
433void WebRtcAudioCapturer::SetVolume(int volume) {
434  DVLOG(1) << "WebRtcAudioCapturer::SetVolume()";
435  DCHECK_LE(volume, MaxVolume());
436  double normalized_volume = static_cast<double>(volume) / MaxVolume();
437  base::AutoLock auto_lock(lock_);
438  if (source_.get())
439    source_->SetVolume(normalized_volume);
440}
441
442int WebRtcAudioCapturer::Volume() const {
443  base::AutoLock auto_lock(lock_);
444  return volume_;
445}
446
447int WebRtcAudioCapturer::MaxVolume() const {
448  return WebRtcAudioDeviceImpl::kMaxVolumeLevel;
449}
450
451void WebRtcAudioCapturer::Capture(const media::AudioBus* audio_source,
452                                  int audio_delay_milliseconds,
453                                  double volume,
454                                  bool key_pressed) {
455// This callback is driven by AudioInputDevice::AudioThreadCallback if
456// |source_| is AudioInputDevice, otherwise it is driven by client's
457// CaptureCallback.
458#if defined(OS_WIN) || defined(OS_MACOSX)
459  DCHECK_LE(volume, 1.0);
460#elif (defined(OS_LINUX) && !defined(OS_CHROMEOS)) || defined(OS_OPENBSD)
461  // We have a special situation on Linux where the microphone volume can be
462  // "higher than maximum". The input volume slider in the sound preference
463  // allows the user to set a scaling that is higher than 100%. It means that
464  // even if the reported maximum levels is N, the actual microphone level can
465  // go up to 1.5x*N and that corresponds to a normalized |volume| of 1.5x.
466  DCHECK_LE(volume, 1.6);
467#endif
468
469  TrackList::ItemList tracks;
470  TrackList::ItemList tracks_to_notify_format;
471  int current_volume = 0;
472  base::TimeDelta audio_delay;
473  bool need_audio_processing = true;
474  {
475    base::AutoLock auto_lock(lock_);
476    if (!running_)
477      return;
478
479    // Map internal volume range of [0.0, 1.0] into [0, 255] used by AGC.
480    // The volume can be higher than 255 on Linux, and it will be cropped to
481    // 255 since AGC does not allow values out of range.
482    volume_ = static_cast<int>((volume * MaxVolume()) + 0.5);
483    current_volume = volume_ > MaxVolume() ? MaxVolume() : volume_;
484    audio_delay = base::TimeDelta::FromMilliseconds(audio_delay_milliseconds);
485    audio_delay_ = audio_delay;
486    key_pressed_ = key_pressed;
487    tracks = tracks_.Items();
488    tracks_.RetrieveAndClearTags(&tracks_to_notify_format);
489
490    // Set the flag to turn on the audio processing in PeerConnection level.
491    // Note that, we turn off the audio processing in PeerConnection if the
492    // processor has already processed the data.
493    need_audio_processing = need_audio_processing_ ?
494        !MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() : false;
495  }
496
497  DCHECK(audio_processor_->InputFormat().IsValid());
498  DCHECK_EQ(audio_source->channels(),
499            audio_processor_->InputFormat().channels());
500  DCHECK_EQ(audio_source->frames(),
501            audio_processor_->InputFormat().frames_per_buffer());
502
503  // Notify the tracks on when the format changes. This will do nothing if
504  // |tracks_to_notify_format| is empty.
505  media::AudioParameters output_params = audio_processor_->OutputFormat();
506  for (TrackList::ItemList::const_iterator it = tracks_to_notify_format.begin();
507       it != tracks_to_notify_format.end(); ++it) {
508    (*it)->OnSetFormat(output_params);
509    (*it)->SetAudioProcessor(audio_processor_);
510  }
511
512  if ((base::TimeTicks::Now() - last_audio_level_log_time_).InSeconds() >
513          kPowerMonitorLogIntervalSeconds) {
514    audio_power_monitor_.Scan(*audio_source, audio_source->frames());
515
516    last_audio_level_log_time_ = base::TimeTicks::Now();
517
518    std::pair<float, bool> result =
519        audio_power_monitor_.ReadCurrentPowerAndClip();
520    WebRtcLogMessage(base::StringPrintf(
521        "WAC::Capture: current_audio_power=%.2fdBFS.", result.first));
522
523    audio_power_monitor_.Reset();
524  }
525
526  // Push the data to the processor for processing.
527  audio_processor_->PushCaptureData(audio_source);
528
529  // Process and consume the data in the processor until there is not enough
530  // data in the processor.
531  int16* output = NULL;
532  int new_volume = 0;
533  while (audio_processor_->ProcessAndConsumeData(
534      audio_delay, current_volume, key_pressed, &new_volume, &output)) {
535    // Feed the post-processed data to the tracks.
536    for (TrackList::ItemList::const_iterator it = tracks.begin();
537         it != tracks.end(); ++it) {
538      (*it)->Capture(output, audio_delay, current_volume, key_pressed,
539                     need_audio_processing);
540    }
541
542    if (new_volume) {
543      SetVolume(new_volume);
544
545      // Update the |current_volume| to avoid passing the old volume to AGC.
546      current_volume = new_volume;
547    }
548  }
549}
550
551void WebRtcAudioCapturer::OnCaptureError() {
552  NOTIMPLEMENTED();
553}
554
555media::AudioParameters WebRtcAudioCapturer::source_audio_parameters() const {
556  base::AutoLock auto_lock(lock_);
557  return audio_processor_ ?
558      audio_processor_->InputFormat() : media::AudioParameters();
559}
560
561bool WebRtcAudioCapturer::GetPairedOutputParameters(
562    int* session_id,
563    int* output_sample_rate,
564    int* output_frames_per_buffer) const {
565  // Don't set output parameters unless all of them are valid.
566  if (device_info_.session_id <= 0 ||
567      !device_info_.device.matched_output.sample_rate ||
568      !device_info_.device.matched_output.frames_per_buffer)
569    return false;
570
571  *session_id = device_info_.session_id;
572  *output_sample_rate = device_info_.device.matched_output.sample_rate;
573  *output_frames_per_buffer =
574      device_info_.device.matched_output.frames_per_buffer;
575
576  return true;
577}
578
579int WebRtcAudioCapturer::GetBufferSize(int sample_rate) const {
580  DCHECK(thread_checker_.CalledOnValidThread());
581#if defined(OS_ANDROID)
582  // TODO(henrika): Tune and adjust buffer size on Android.
583  return (2 * sample_rate / 100);
584#endif
585
586  // PeerConnection is running at a buffer size of 10ms data. A multiple of
587  // 10ms as the buffer size can give the best performance to PeerConnection.
588  int peer_connection_buffer_size = sample_rate / 100;
589
590  // Use the native hardware buffer size in non peer connection mode when the
591  // platform is using a native buffer size smaller than the PeerConnection
592  // buffer size.
593  int hardware_buffer_size = device_info_.device.input.frames_per_buffer;
594  if (!peer_connection_mode_ && hardware_buffer_size &&
595      hardware_buffer_size <= peer_connection_buffer_size) {
596    return hardware_buffer_size;
597  }
598
599  return (sample_rate / 100);
600}
601
602void WebRtcAudioCapturer::GetAudioProcessingParams(
603    base::TimeDelta* delay, int* volume, bool* key_pressed) {
604  base::AutoLock auto_lock(lock_);
605  *delay = audio_delay_;
606  *volume = volume_;
607  *key_pressed = key_pressed_;
608}
609
610void WebRtcAudioCapturer::SetCapturerSourceForTesting(
611    const scoped_refptr<media::AudioCapturerSource>& source,
612    media::AudioParameters params) {
613  // Create a new audio stream as source which uses the new source.
614  SetCapturerSource(source, params.channel_layout(),
615                    static_cast<float>(params.sample_rate()));
616}
617
618}  // namespace content
619