1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "content/renderer/media/webrtc_audio_capturer.h" 6 7#include "base/bind.h" 8#include "base/logging.h" 9#include "base/metrics/histogram.h" 10#include "base/strings/string_util.h" 11#include "base/strings/stringprintf.h" 12#include "content/child/child_process.h" 13#include "content/renderer/media/audio_device_factory.h" 14#include "content/renderer/media/media_stream_audio_processor.h" 15#include "content/renderer/media/media_stream_audio_processor_options.h" 16#include "content/renderer/media/media_stream_audio_source.h" 17#include "content/renderer/media/webrtc_audio_device_impl.h" 18#include "content/renderer/media/webrtc_local_audio_track.h" 19#include "content/renderer/media/webrtc_logging.h" 20#include "media/audio/sample_rates.h" 21 22namespace content { 23 24namespace { 25 26// Supported hardware sample rates for input and output sides. 27#if defined(OS_WIN) || defined(OS_MACOSX) 28// media::GetAudioInputHardwareSampleRate() asks the audio layer 29// for its current sample rate (set by the user) on Windows and Mac OS X. 30// The listed rates below adds restrictions and WebRtcAudioDeviceImpl::Init() 31// will fail if the user selects any rate outside these ranges. 32const int kValidInputRates[] = 33 {192000, 96000, 48000, 44100, 32000, 16000, 8000}; 34#elif defined(OS_LINUX) || defined(OS_OPENBSD) 35const int kValidInputRates[] = {48000, 44100}; 36#elif defined(OS_ANDROID) 37const int kValidInputRates[] = {48000, 44100}; 38#else 39const int kValidInputRates[] = {44100}; 40#endif 41 42// Time constant for AudioPowerMonitor. See AudioPowerMonitor ctor comments 43// for semantics. This value was arbitrarily chosen, but seems to work well. 44const int kPowerMonitorTimeConstantMs = 10; 45 46// The time between two audio power level samples. 47const int kPowerMonitorLogIntervalSeconds = 10; 48 49} // namespace 50 51// Reference counted container of WebRtcLocalAudioTrack delegate. 52// TODO(xians): Switch to MediaStreamAudioSinkOwner. 53class WebRtcAudioCapturer::TrackOwner 54 : public base::RefCountedThreadSafe<WebRtcAudioCapturer::TrackOwner> { 55 public: 56 explicit TrackOwner(WebRtcLocalAudioTrack* track) 57 : delegate_(track) {} 58 59 void Capture(const int16* audio_data, 60 base::TimeDelta delay, 61 double volume, 62 bool key_pressed, 63 bool need_audio_processing) { 64 base::AutoLock lock(lock_); 65 if (delegate_) { 66 delegate_->Capture(audio_data, 67 delay, 68 volume, 69 key_pressed, 70 need_audio_processing); 71 } 72 } 73 74 void OnSetFormat(const media::AudioParameters& params) { 75 base::AutoLock lock(lock_); 76 if (delegate_) 77 delegate_->OnSetFormat(params); 78 } 79 80 void SetAudioProcessor( 81 const scoped_refptr<MediaStreamAudioProcessor>& processor) { 82 base::AutoLock lock(lock_); 83 if (delegate_) 84 delegate_->SetAudioProcessor(processor); 85 } 86 87 void Reset() { 88 base::AutoLock lock(lock_); 89 delegate_ = NULL; 90 } 91 92 void Stop() { 93 base::AutoLock lock(lock_); 94 DCHECK(delegate_); 95 96 // This can be reentrant so reset |delegate_| before calling out. 97 WebRtcLocalAudioTrack* temp = delegate_; 98 delegate_ = NULL; 99 temp->Stop(); 100 } 101 102 // Wrapper which allows to use std::find_if() when adding and removing 103 // sinks to/from the list. 104 struct TrackWrapper { 105 TrackWrapper(WebRtcLocalAudioTrack* track) : track_(track) {} 106 bool operator()( 107 const scoped_refptr<WebRtcAudioCapturer::TrackOwner>& owner) const { 108 return owner->IsEqual(track_); 109 } 110 WebRtcLocalAudioTrack* track_; 111 }; 112 113 protected: 114 virtual ~TrackOwner() {} 115 116 private: 117 friend class base::RefCountedThreadSafe<WebRtcAudioCapturer::TrackOwner>; 118 119 bool IsEqual(const WebRtcLocalAudioTrack* other) const { 120 base::AutoLock lock(lock_); 121 return (other == delegate_); 122 } 123 124 // Do NOT reference count the |delegate_| to avoid cyclic reference counting. 125 WebRtcLocalAudioTrack* delegate_; 126 mutable base::Lock lock_; 127 128 DISALLOW_COPY_AND_ASSIGN(TrackOwner); 129}; 130 131// static 132scoped_refptr<WebRtcAudioCapturer> WebRtcAudioCapturer::CreateCapturer( 133 int render_view_id, const StreamDeviceInfo& device_info, 134 const blink::WebMediaConstraints& constraints, 135 WebRtcAudioDeviceImpl* audio_device, 136 MediaStreamAudioSource* audio_source) { 137 scoped_refptr<WebRtcAudioCapturer> capturer = new WebRtcAudioCapturer( 138 render_view_id, device_info, constraints, audio_device, audio_source); 139 if (capturer->Initialize()) 140 return capturer; 141 142 return NULL; 143} 144 145bool WebRtcAudioCapturer::Initialize() { 146 DCHECK(thread_checker_.CalledOnValidThread()); 147 DVLOG(1) << "WebRtcAudioCapturer::Initialize()"; 148 WebRtcLogMessage(base::StringPrintf( 149 "WAC::Initialize. render_view_id=%d" 150 ", channel_layout=%d, sample_rate=%d, buffer_size=%d" 151 ", session_id=%d, paired_output_sample_rate=%d" 152 ", paired_output_frames_per_buffer=%d, effects=%d. ", 153 render_view_id_, 154 device_info_.device.input.channel_layout, 155 device_info_.device.input.sample_rate, 156 device_info_.device.input.frames_per_buffer, 157 device_info_.session_id, 158 device_info_.device.matched_output.sample_rate, 159 device_info_.device.matched_output.frames_per_buffer, 160 device_info_.device.input.effects)); 161 162 if (render_view_id_ == -1) { 163 // Return true here to allow injecting a new source via 164 // SetCapturerSourceForTesting() at a later state. 165 return true; 166 } 167 168 MediaAudioConstraints audio_constraints(constraints_, 169 device_info_.device.input.effects); 170 if (!audio_constraints.IsValid()) 171 return false; 172 173 media::ChannelLayout channel_layout = static_cast<media::ChannelLayout>( 174 device_info_.device.input.channel_layout); 175 DVLOG(1) << "Audio input hardware channel layout: " << channel_layout; 176 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioInputChannelLayout", 177 channel_layout, media::CHANNEL_LAYOUT_MAX + 1); 178 179 // Verify that the reported input channel configuration is supported. 180 if (channel_layout != media::CHANNEL_LAYOUT_MONO && 181 channel_layout != media::CHANNEL_LAYOUT_STEREO && 182 channel_layout != media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC) { 183 DLOG(ERROR) << channel_layout 184 << " is not a supported input channel configuration."; 185 return false; 186 } 187 188 DVLOG(1) << "Audio input hardware sample rate: " 189 << device_info_.device.input.sample_rate; 190 media::AudioSampleRate asr; 191 if (media::ToAudioSampleRate(device_info_.device.input.sample_rate, &asr)) { 192 UMA_HISTOGRAM_ENUMERATION( 193 "WebRTC.AudioInputSampleRate", asr, media::kAudioSampleRateMax + 1); 194 } else { 195 UMA_HISTOGRAM_COUNTS("WebRTC.AudioInputSampleRateUnexpected", 196 device_info_.device.input.sample_rate); 197 } 198 199 // Verify that the reported input hardware sample rate is supported 200 // on the current platform. 201 if (std::find(&kValidInputRates[0], 202 &kValidInputRates[0] + arraysize(kValidInputRates), 203 device_info_.device.input.sample_rate) == 204 &kValidInputRates[arraysize(kValidInputRates)]) { 205 DLOG(ERROR) << device_info_.device.input.sample_rate 206 << " is not a supported input rate."; 207 return false; 208 } 209 210 // Create and configure the default audio capturing source. 211 SetCapturerSource(AudioDeviceFactory::NewInputDevice(render_view_id_), 212 channel_layout, 213 static_cast<float>(device_info_.device.input.sample_rate)); 214 215 // Add the capturer to the WebRtcAudioDeviceImpl since it needs some hardware 216 // information from the capturer. 217 if (audio_device_) 218 audio_device_->AddAudioCapturer(this); 219 220 return true; 221} 222 223WebRtcAudioCapturer::WebRtcAudioCapturer( 224 int render_view_id, 225 const StreamDeviceInfo& device_info, 226 const blink::WebMediaConstraints& constraints, 227 WebRtcAudioDeviceImpl* audio_device, 228 MediaStreamAudioSource* audio_source) 229 : constraints_(constraints), 230 audio_processor_( 231 new talk_base::RefCountedObject<MediaStreamAudioProcessor>( 232 constraints, device_info.device.input.effects, audio_device)), 233 running_(false), 234 render_view_id_(render_view_id), 235 device_info_(device_info), 236 volume_(0), 237 peer_connection_mode_(false), 238 key_pressed_(false), 239 need_audio_processing_(false), 240 audio_device_(audio_device), 241 audio_source_(audio_source), 242 audio_power_monitor_( 243 device_info_.device.input.sample_rate, 244 base::TimeDelta::FromMilliseconds(kPowerMonitorTimeConstantMs)) { 245 DVLOG(1) << "WebRtcAudioCapturer::WebRtcAudioCapturer()"; 246} 247 248WebRtcAudioCapturer::~WebRtcAudioCapturer() { 249 DCHECK(thread_checker_.CalledOnValidThread()); 250 DCHECK(tracks_.IsEmpty()); 251 DVLOG(1) << "WebRtcAudioCapturer::~WebRtcAudioCapturer()"; 252 Stop(); 253} 254 255void WebRtcAudioCapturer::AddTrack(WebRtcLocalAudioTrack* track) { 256 DCHECK(track); 257 DVLOG(1) << "WebRtcAudioCapturer::AddTrack()"; 258 259 { 260 base::AutoLock auto_lock(lock_); 261 // Verify that |track| is not already added to the list. 262 DCHECK(!tracks_.Contains(TrackOwner::TrackWrapper(track))); 263 264 // Add with a tag, so we remember to call OnSetFormat() on the new 265 // track. 266 scoped_refptr<TrackOwner> track_owner(new TrackOwner(track)); 267 tracks_.AddAndTag(track_owner); 268 } 269} 270 271void WebRtcAudioCapturer::RemoveTrack(WebRtcLocalAudioTrack* track) { 272 DCHECK(thread_checker_.CalledOnValidThread()); 273 DVLOG(1) << "WebRtcAudioCapturer::RemoveTrack()"; 274 bool stop_source = false; 275 { 276 base::AutoLock auto_lock(lock_); 277 278 scoped_refptr<TrackOwner> removed_item = 279 tracks_.Remove(TrackOwner::TrackWrapper(track)); 280 281 // Clear the delegate to ensure that no more capture callbacks will 282 // be sent to this sink. Also avoids a possible crash which can happen 283 // if this method is called while capturing is active. 284 if (removed_item.get()) { 285 removed_item->Reset(); 286 stop_source = tracks_.IsEmpty(); 287 } 288 } 289 if (stop_source) { 290 // Since WebRtcAudioCapturer does not inherit MediaStreamAudioSource, 291 // and instead MediaStreamAudioSource is composed of a WebRtcAudioCapturer, 292 // we have to call StopSource on the MediaStreamSource. This will call 293 // MediaStreamAudioSource::DoStopSource which in turn call 294 // WebRtcAudioCapturerer::Stop(); 295 audio_source_->StopSource(); 296 } 297} 298 299void WebRtcAudioCapturer::SetCapturerSource( 300 const scoped_refptr<media::AudioCapturerSource>& source, 301 media::ChannelLayout channel_layout, 302 float sample_rate) { 303 DCHECK(thread_checker_.CalledOnValidThread()); 304 DVLOG(1) << "SetCapturerSource(channel_layout=" << channel_layout << "," 305 << "sample_rate=" << sample_rate << ")"; 306 scoped_refptr<media::AudioCapturerSource> old_source; 307 { 308 base::AutoLock auto_lock(lock_); 309 if (source_.get() == source.get()) 310 return; 311 312 source_.swap(old_source); 313 source_ = source; 314 315 // Reset the flag to allow starting the new source. 316 running_ = false; 317 } 318 319 DVLOG(1) << "Switching to a new capture source."; 320 if (old_source.get()) 321 old_source->Stop(); 322 323 // Dispatch the new parameters both to the sink(s) and to the new source, 324 // also apply the new |constraints|. 325 // The idea is to get rid of any dependency of the microphone parameters 326 // which would normally be used by default. 327 // bits_per_sample is always 16 for now. 328 int buffer_size = GetBufferSize(sample_rate); 329 media::AudioParameters params(media::AudioParameters::AUDIO_PCM_LOW_LATENCY, 330 channel_layout, 0, sample_rate, 331 16, buffer_size, 332 device_info_.device.input.effects); 333 334 { 335 base::AutoLock auto_lock(lock_); 336 // Notify the |audio_processor_| of the new format. 337 audio_processor_->OnCaptureFormatChanged(params); 338 339 MediaAudioConstraints audio_constraints(constraints_, 340 device_info_.device.input.effects); 341 need_audio_processing_ = audio_constraints.NeedsAudioProcessing(); 342 // Notify all tracks about the new format. 343 tracks_.TagAll(); 344 } 345 346 if (source.get()) 347 source->Initialize(params, this, session_id()); 348 349 Start(); 350} 351 352void WebRtcAudioCapturer::EnablePeerConnectionMode() { 353 DCHECK(thread_checker_.CalledOnValidThread()); 354 DVLOG(1) << "EnablePeerConnectionMode"; 355 // Do nothing if the peer connection mode has been enabled. 356 if (peer_connection_mode_) 357 return; 358 359 peer_connection_mode_ = true; 360 int render_view_id = -1; 361 media::AudioParameters input_params; 362 { 363 base::AutoLock auto_lock(lock_); 364 // Simply return if there is no existing source or the |render_view_id_| is 365 // not valid. 366 if (!source_.get() || render_view_id_== -1) 367 return; 368 369 render_view_id = render_view_id_; 370 input_params = audio_processor_->InputFormat(); 371 } 372 373 // Do nothing if the current buffer size is the WebRtc native buffer size. 374 if (GetBufferSize(input_params.sample_rate()) == 375 input_params.frames_per_buffer()) { 376 return; 377 } 378 379 // Create a new audio stream as source which will open the hardware using 380 // WebRtc native buffer size. 381 SetCapturerSource(AudioDeviceFactory::NewInputDevice(render_view_id), 382 input_params.channel_layout(), 383 static_cast<float>(input_params.sample_rate())); 384} 385 386void WebRtcAudioCapturer::Start() { 387 DCHECK(thread_checker_.CalledOnValidThread()); 388 DVLOG(1) << "WebRtcAudioCapturer::Start()"; 389 base::AutoLock auto_lock(lock_); 390 if (running_ || !source_) 391 return; 392 393 // Start the data source, i.e., start capturing data from the current source. 394 // We need to set the AGC control before starting the stream. 395 source_->SetAutomaticGainControl(true); 396 source_->Start(); 397 running_ = true; 398} 399 400void WebRtcAudioCapturer::Stop() { 401 DCHECK(thread_checker_.CalledOnValidThread()); 402 DVLOG(1) << "WebRtcAudioCapturer::Stop()"; 403 scoped_refptr<media::AudioCapturerSource> source; 404 TrackList::ItemList tracks; 405 { 406 base::AutoLock auto_lock(lock_); 407 if (!running_) 408 return; 409 410 source = source_; 411 tracks = tracks_.Items(); 412 tracks_.Clear(); 413 running_ = false; 414 } 415 416 // Remove the capturer object from the WebRtcAudioDeviceImpl. 417 if (audio_device_) 418 audio_device_->RemoveAudioCapturer(this); 419 420 for (TrackList::ItemList::const_iterator it = tracks.begin(); 421 it != tracks.end(); 422 ++it) { 423 (*it)->Stop(); 424 } 425 426 if (source.get()) 427 source->Stop(); 428 429 // Stop the audio processor to avoid feeding render data into the processor. 430 audio_processor_->Stop(); 431} 432 433void WebRtcAudioCapturer::SetVolume(int volume) { 434 DVLOG(1) << "WebRtcAudioCapturer::SetVolume()"; 435 DCHECK_LE(volume, MaxVolume()); 436 double normalized_volume = static_cast<double>(volume) / MaxVolume(); 437 base::AutoLock auto_lock(lock_); 438 if (source_.get()) 439 source_->SetVolume(normalized_volume); 440} 441 442int WebRtcAudioCapturer::Volume() const { 443 base::AutoLock auto_lock(lock_); 444 return volume_; 445} 446 447int WebRtcAudioCapturer::MaxVolume() const { 448 return WebRtcAudioDeviceImpl::kMaxVolumeLevel; 449} 450 451void WebRtcAudioCapturer::Capture(const media::AudioBus* audio_source, 452 int audio_delay_milliseconds, 453 double volume, 454 bool key_pressed) { 455// This callback is driven by AudioInputDevice::AudioThreadCallback if 456// |source_| is AudioInputDevice, otherwise it is driven by client's 457// CaptureCallback. 458#if defined(OS_WIN) || defined(OS_MACOSX) 459 DCHECK_LE(volume, 1.0); 460#elif (defined(OS_LINUX) && !defined(OS_CHROMEOS)) || defined(OS_OPENBSD) 461 // We have a special situation on Linux where the microphone volume can be 462 // "higher than maximum". The input volume slider in the sound preference 463 // allows the user to set a scaling that is higher than 100%. It means that 464 // even if the reported maximum levels is N, the actual microphone level can 465 // go up to 1.5x*N and that corresponds to a normalized |volume| of 1.5x. 466 DCHECK_LE(volume, 1.6); 467#endif 468 469 TrackList::ItemList tracks; 470 TrackList::ItemList tracks_to_notify_format; 471 int current_volume = 0; 472 base::TimeDelta audio_delay; 473 bool need_audio_processing = true; 474 { 475 base::AutoLock auto_lock(lock_); 476 if (!running_) 477 return; 478 479 // Map internal volume range of [0.0, 1.0] into [0, 255] used by AGC. 480 // The volume can be higher than 255 on Linux, and it will be cropped to 481 // 255 since AGC does not allow values out of range. 482 volume_ = static_cast<int>((volume * MaxVolume()) + 0.5); 483 current_volume = volume_ > MaxVolume() ? MaxVolume() : volume_; 484 audio_delay = base::TimeDelta::FromMilliseconds(audio_delay_milliseconds); 485 audio_delay_ = audio_delay; 486 key_pressed_ = key_pressed; 487 tracks = tracks_.Items(); 488 tracks_.RetrieveAndClearTags(&tracks_to_notify_format); 489 490 // Set the flag to turn on the audio processing in PeerConnection level. 491 // Note that, we turn off the audio processing in PeerConnection if the 492 // processor has already processed the data. 493 need_audio_processing = need_audio_processing_ ? 494 !MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() : false; 495 } 496 497 DCHECK(audio_processor_->InputFormat().IsValid()); 498 DCHECK_EQ(audio_source->channels(), 499 audio_processor_->InputFormat().channels()); 500 DCHECK_EQ(audio_source->frames(), 501 audio_processor_->InputFormat().frames_per_buffer()); 502 503 // Notify the tracks on when the format changes. This will do nothing if 504 // |tracks_to_notify_format| is empty. 505 media::AudioParameters output_params = audio_processor_->OutputFormat(); 506 for (TrackList::ItemList::const_iterator it = tracks_to_notify_format.begin(); 507 it != tracks_to_notify_format.end(); ++it) { 508 (*it)->OnSetFormat(output_params); 509 (*it)->SetAudioProcessor(audio_processor_); 510 } 511 512 if ((base::TimeTicks::Now() - last_audio_level_log_time_).InSeconds() > 513 kPowerMonitorLogIntervalSeconds) { 514 audio_power_monitor_.Scan(*audio_source, audio_source->frames()); 515 516 last_audio_level_log_time_ = base::TimeTicks::Now(); 517 518 std::pair<float, bool> result = 519 audio_power_monitor_.ReadCurrentPowerAndClip(); 520 WebRtcLogMessage(base::StringPrintf( 521 "WAC::Capture: current_audio_power=%.2fdBFS.", result.first)); 522 523 audio_power_monitor_.Reset(); 524 } 525 526 // Push the data to the processor for processing. 527 audio_processor_->PushCaptureData(audio_source); 528 529 // Process and consume the data in the processor until there is not enough 530 // data in the processor. 531 int16* output = NULL; 532 int new_volume = 0; 533 while (audio_processor_->ProcessAndConsumeData( 534 audio_delay, current_volume, key_pressed, &new_volume, &output)) { 535 // Feed the post-processed data to the tracks. 536 for (TrackList::ItemList::const_iterator it = tracks.begin(); 537 it != tracks.end(); ++it) { 538 (*it)->Capture(output, audio_delay, current_volume, key_pressed, 539 need_audio_processing); 540 } 541 542 if (new_volume) { 543 SetVolume(new_volume); 544 545 // Update the |current_volume| to avoid passing the old volume to AGC. 546 current_volume = new_volume; 547 } 548 } 549} 550 551void WebRtcAudioCapturer::OnCaptureError() { 552 NOTIMPLEMENTED(); 553} 554 555media::AudioParameters WebRtcAudioCapturer::source_audio_parameters() const { 556 base::AutoLock auto_lock(lock_); 557 return audio_processor_ ? 558 audio_processor_->InputFormat() : media::AudioParameters(); 559} 560 561bool WebRtcAudioCapturer::GetPairedOutputParameters( 562 int* session_id, 563 int* output_sample_rate, 564 int* output_frames_per_buffer) const { 565 // Don't set output parameters unless all of them are valid. 566 if (device_info_.session_id <= 0 || 567 !device_info_.device.matched_output.sample_rate || 568 !device_info_.device.matched_output.frames_per_buffer) 569 return false; 570 571 *session_id = device_info_.session_id; 572 *output_sample_rate = device_info_.device.matched_output.sample_rate; 573 *output_frames_per_buffer = 574 device_info_.device.matched_output.frames_per_buffer; 575 576 return true; 577} 578 579int WebRtcAudioCapturer::GetBufferSize(int sample_rate) const { 580 DCHECK(thread_checker_.CalledOnValidThread()); 581#if defined(OS_ANDROID) 582 // TODO(henrika): Tune and adjust buffer size on Android. 583 return (2 * sample_rate / 100); 584#endif 585 586 // PeerConnection is running at a buffer size of 10ms data. A multiple of 587 // 10ms as the buffer size can give the best performance to PeerConnection. 588 int peer_connection_buffer_size = sample_rate / 100; 589 590 // Use the native hardware buffer size in non peer connection mode when the 591 // platform is using a native buffer size smaller than the PeerConnection 592 // buffer size. 593 int hardware_buffer_size = device_info_.device.input.frames_per_buffer; 594 if (!peer_connection_mode_ && hardware_buffer_size && 595 hardware_buffer_size <= peer_connection_buffer_size) { 596 return hardware_buffer_size; 597 } 598 599 return (sample_rate / 100); 600} 601 602void WebRtcAudioCapturer::GetAudioProcessingParams( 603 base::TimeDelta* delay, int* volume, bool* key_pressed) { 604 base::AutoLock auto_lock(lock_); 605 *delay = audio_delay_; 606 *volume = volume_; 607 *key_pressed = key_pressed_; 608} 609 610void WebRtcAudioCapturer::SetCapturerSourceForTesting( 611 const scoped_refptr<media::AudioCapturerSource>& source, 612 media::AudioParameters params) { 613 // Create a new audio stream as source which uses the new source. 614 SetCapturerSource(source, params.channel_layout(), 615 static_cast<float>(params.sample_rate())); 616} 617 618} // namespace content 619