media_stream_audio_processor.cc revision a02191e04bc25c4935f804f2c080ae28663d096d
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "content/renderer/media/media_stream_audio_processor.h" 6 7#include "base/command_line.h" 8#include "base/debug/trace_event.h" 9#include "base/metrics/field_trial.h" 10#include "base/metrics/histogram.h" 11#include "content/public/common/content_switches.h" 12#include "content/renderer/media/media_stream_audio_processor_options.h" 13#include "content/renderer/media/rtc_media_constraints.h" 14#include "content/renderer/media/webrtc_audio_device_impl.h" 15#include "media/audio/audio_parameters.h" 16#include "media/base/audio_converter.h" 17#include "media/base/audio_fifo.h" 18#include "media/base/channel_layout.h" 19#include "third_party/WebKit/public/platform/WebMediaConstraints.h" 20#include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h" 21#include "third_party/webrtc/modules/audio_processing/typing_detection.h" 22 23namespace content { 24 25namespace { 26 27using webrtc::AudioProcessing; 28using webrtc::MediaConstraintsInterface; 29 30#if defined(OS_ANDROID) 31const int kAudioProcessingSampleRate = 16000; 32#else 33const int kAudioProcessingSampleRate = 32000; 34#endif 35const int kAudioProcessingNumberOfChannels = 1; 36 37const int kMaxNumberOfBuffersInFifo = 2; 38 39// Used by UMA histograms and entries shouldn't be re-ordered or removed. 40enum AudioTrackProcessingStates { 41 AUDIO_PROCESSING_ENABLED = 0, 42 AUDIO_PROCESSING_DISABLED, 43 AUDIO_PROCESSING_IN_WEBRTC, 44 AUDIO_PROCESSING_MAX 45}; 46 47void RecordProcessingState(AudioTrackProcessingStates state) { 48 UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates", 49 state, AUDIO_PROCESSING_MAX); 50} 51 52} // namespace 53 54class MediaStreamAudioProcessor::MediaStreamAudioConverter 55 : public media::AudioConverter::InputCallback { 56 public: 57 MediaStreamAudioConverter(const media::AudioParameters& source_params, 58 const media::AudioParameters& sink_params) 59 : source_params_(source_params), 60 sink_params_(sink_params), 61 audio_converter_(source_params, sink_params_, false) { 62 // An instance of MediaStreamAudioConverter may be created in the main 63 // render thread and used in the audio thread, for example, the 64 // |MediaStreamAudioProcessor::capture_converter_|. 65 thread_checker_.DetachFromThread(); 66 audio_converter_.AddInput(this); 67 // Create and initialize audio fifo and audio bus wrapper. 68 // The size of the FIFO should be at least twice of the source buffer size 69 // or twice of the sink buffer size. 70 int buffer_size = std::max( 71 kMaxNumberOfBuffersInFifo * source_params_.frames_per_buffer(), 72 kMaxNumberOfBuffersInFifo * sink_params_.frames_per_buffer()); 73 fifo_.reset(new media::AudioFifo(source_params_.channels(), buffer_size)); 74 // TODO(xians): Use CreateWrapper to save one memcpy. 75 audio_wrapper_ = media::AudioBus::Create(sink_params_.channels(), 76 sink_params_.frames_per_buffer()); 77 } 78 79 virtual ~MediaStreamAudioConverter() { 80 audio_converter_.RemoveInput(this); 81 } 82 83 void Push(media::AudioBus* audio_source) { 84 // Called on the audio thread, which is the capture audio thread for 85 // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread 86 // for |MediaStreamAudioProcessor::render_converter_|. 87 // And it must be the same thread as calling Convert(). 88 DCHECK(thread_checker_.CalledOnValidThread()); 89 fifo_->Push(audio_source); 90 } 91 92 bool Convert(webrtc::AudioFrame* out) { 93 // Called on the audio thread, which is the capture audio thread for 94 // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread 95 // for |MediaStreamAudioProcessor::render_converter_|. 96 DCHECK(thread_checker_.CalledOnValidThread()); 97 // Return false if there is not enough data in the FIFO, this happens when 98 // fifo_->frames() / source_params_.sample_rate() is less than 99 // sink_params.frames_per_buffer() / sink_params.sample_rate(). 100 if (fifo_->frames() * sink_params_.sample_rate() < 101 sink_params_.frames_per_buffer() * source_params_.sample_rate()) { 102 return false; 103 } 104 105 // Convert data to the output format, this will trigger ProvideInput(). 106 audio_converter_.Convert(audio_wrapper_.get()); 107 108 // TODO(xians): Figure out a better way to handle the interleaved and 109 // deinterleaved format switching. 110 DCHECK_EQ(audio_wrapper_->frames(), sink_params_.frames_per_buffer()); 111 audio_wrapper_->ToInterleaved(audio_wrapper_->frames(), 112 sink_params_.bits_per_sample() / 8, 113 out->data_); 114 115 out->samples_per_channel_ = sink_params_.frames_per_buffer(); 116 out->sample_rate_hz_ = sink_params_.sample_rate(); 117 out->speech_type_ = webrtc::AudioFrame::kNormalSpeech; 118 out->vad_activity_ = webrtc::AudioFrame::kVadUnknown; 119 out->num_channels_ = sink_params_.channels(); 120 121 return true; 122 } 123 124 const media::AudioParameters& source_parameters() const { 125 return source_params_; 126 } 127 const media::AudioParameters& sink_parameters() const { 128 return sink_params_; 129 } 130 131 private: 132 // AudioConverter::InputCallback implementation. 133 virtual double ProvideInput(media::AudioBus* audio_bus, 134 base::TimeDelta buffer_delay) OVERRIDE { 135 // Called on realtime audio thread. 136 // TODO(xians): Figure out why the first Convert() triggers ProvideInput 137 // two times. 138 if (fifo_->frames() < audio_bus->frames()) 139 return 0; 140 141 fifo_->Consume(audio_bus, 0, audio_bus->frames()); 142 143 // Return 1.0 to indicate no volume scaling on the data. 144 return 1.0; 145 } 146 147 base::ThreadChecker thread_checker_; 148 const media::AudioParameters source_params_; 149 const media::AudioParameters sink_params_; 150 151 // TODO(xians): consider using SincResampler to save some memcpy. 152 // Handles mixing and resampling between input and output parameters. 153 media::AudioConverter audio_converter_; 154 scoped_ptr<media::AudioBus> audio_wrapper_; 155 scoped_ptr<media::AudioFifo> fifo_; 156}; 157 158MediaStreamAudioProcessor::MediaStreamAudioProcessor( 159 const blink::WebMediaConstraints& constraints, 160 int effects, 161 MediaStreamType type, 162 WebRtcPlayoutDataSource* playout_data_source) 163 : render_delay_ms_(0), 164 playout_data_source_(playout_data_source), 165 audio_mirroring_(false), 166 typing_detected_(false) { 167 capture_thread_checker_.DetachFromThread(); 168 render_thread_checker_.DetachFromThread(); 169 InitializeAudioProcessingModule(constraints, effects, type); 170} 171 172MediaStreamAudioProcessor::~MediaStreamAudioProcessor() { 173 DCHECK(main_thread_checker_.CalledOnValidThread()); 174 StopAudioProcessing(); 175} 176 177void MediaStreamAudioProcessor::OnCaptureFormatChanged( 178 const media::AudioParameters& source_params) { 179 DCHECK(main_thread_checker_.CalledOnValidThread()); 180 // There is no need to hold a lock here since the caller guarantees that 181 // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks 182 // on the capture thread. 183 InitializeCaptureConverter(source_params); 184 185 // Reset the |capture_thread_checker_| since the capture data will come from 186 // a new capture thread. 187 capture_thread_checker_.DetachFromThread(); 188} 189 190void MediaStreamAudioProcessor::PushCaptureData(media::AudioBus* audio_source) { 191 DCHECK(capture_thread_checker_.CalledOnValidThread()); 192 DCHECK_EQ(audio_source->channels(), 193 capture_converter_->source_parameters().channels()); 194 DCHECK_EQ(audio_source->frames(), 195 capture_converter_->source_parameters().frames_per_buffer()); 196 197 if (audio_mirroring_ && 198 capture_converter_->source_parameters().channel_layout() == 199 media::CHANNEL_LAYOUT_STEREO) { 200 // Swap the first and second channels. 201 audio_source->SwapChannels(0, 1); 202 } 203 204 capture_converter_->Push(audio_source); 205} 206 207bool MediaStreamAudioProcessor::ProcessAndConsumeData( 208 base::TimeDelta capture_delay, int volume, bool key_pressed, 209 int* new_volume, int16** out) { 210 DCHECK(capture_thread_checker_.CalledOnValidThread()); 211 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData"); 212 213 if (!capture_converter_->Convert(&capture_frame_)) 214 return false; 215 216 *new_volume = ProcessData(&capture_frame_, capture_delay, volume, 217 key_pressed); 218 *out = capture_frame_.data_; 219 220 return true; 221} 222 223const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const { 224 return capture_converter_->source_parameters(); 225} 226 227const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const { 228 return capture_converter_->sink_parameters(); 229} 230 231void MediaStreamAudioProcessor::StartAecDump( 232 const base::PlatformFile& aec_dump_file) { 233 if (audio_processing_) 234 StartEchoCancellationDump(audio_processing_.get(), aec_dump_file); 235} 236 237void MediaStreamAudioProcessor::StopAecDump() { 238 if (audio_processing_) 239 StopEchoCancellationDump(audio_processing_.get()); 240} 241 242void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus, 243 int sample_rate, 244 int audio_delay_milliseconds) { 245 DCHECK(render_thread_checker_.CalledOnValidThread()); 246#if defined(OS_ANDROID) || defined(OS_IOS) 247 DCHECK(audio_processing_->echo_control_mobile()->is_enabled()); 248#else 249 DCHECK(audio_processing_->echo_cancellation()->is_enabled()); 250#endif 251 252 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData"); 253 DCHECK_LT(audio_delay_milliseconds, 254 std::numeric_limits<base::subtle::Atomic32>::max()); 255 base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds); 256 257 InitializeRenderConverterIfNeeded(sample_rate, audio_bus->channels(), 258 audio_bus->frames()); 259 260 render_converter_->Push(audio_bus); 261 while (render_converter_->Convert(&render_frame_)) 262 audio_processing_->AnalyzeReverseStream(&render_frame_); 263} 264 265void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() { 266 DCHECK(main_thread_checker_.CalledOnValidThread()); 267 // There is no need to hold a lock here since the caller guarantees that 268 // there is no more OnPlayoutData() callback on the render thread. 269 render_thread_checker_.DetachFromThread(); 270 render_converter_.reset(); 271} 272 273void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) { 274 stats->typing_noise_detected = 275 (base::subtle::Acquire_Load(&typing_detected_) != false); 276 GetAecStats(audio_processing_.get(), stats); 277} 278 279void MediaStreamAudioProcessor::InitializeAudioProcessingModule( 280 const blink::WebMediaConstraints& constraints, int effects, 281 MediaStreamType type) { 282 DCHECK(!audio_processing_); 283 284 RTCMediaConstraints native_constraints(constraints); 285 286 // Audio mirroring can be enabled even though audio processing is otherwise 287 // disabled. 288 audio_mirroring_ = GetPropertyFromConstraints( 289 &native_constraints, webrtc::MediaConstraintsInterface::kAudioMirroring); 290 291 if (!IsAudioTrackProcessingEnabled()) { 292 RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC); 293 return; 294 } 295 296 // Only apply the fixed constraints for gUM of MEDIA_DEVICE_AUDIO_CAPTURE. 297 DCHECK(IsAudioMediaType(type)); 298 if (type == MEDIA_DEVICE_AUDIO_CAPTURE) 299 ApplyFixedAudioConstraints(&native_constraints); 300 301 if (effects & media::AudioParameters::ECHO_CANCELLER) { 302 // If platform echo canceller is enabled, disable the software AEC. 303 native_constraints.AddMandatory( 304 MediaConstraintsInterface::kEchoCancellation, 305 MediaConstraintsInterface::kValueFalse, true); 306 } 307 308#if defined(OS_IOS) 309 // On iOS, VPIO provides built-in AEC and AGC. 310 const bool enable_aec = false; 311 const bool enable_agc = false; 312#else 313 const bool enable_aec = GetPropertyFromConstraints( 314 &native_constraints, MediaConstraintsInterface::kEchoCancellation); 315 const bool enable_agc = GetPropertyFromConstraints( 316 &native_constraints, webrtc::MediaConstraintsInterface::kAutoGainControl); 317#endif 318 319#if defined(OS_IOS) || defined(OS_ANDROID) 320 const bool enable_experimental_aec = false; 321 const bool enable_typing_detection = false; 322#else 323 const bool enable_experimental_aec = GetPropertyFromConstraints( 324 &native_constraints, 325 MediaConstraintsInterface::kExperimentalEchoCancellation); 326 const bool enable_typing_detection = GetPropertyFromConstraints( 327 &native_constraints, MediaConstraintsInterface::kTypingNoiseDetection); 328#endif 329 330 const bool enable_ns = GetPropertyFromConstraints( 331 &native_constraints, MediaConstraintsInterface::kNoiseSuppression); 332 const bool enable_experimental_ns = GetPropertyFromConstraints( 333 &native_constraints, 334 MediaConstraintsInterface::kExperimentalNoiseSuppression); 335 const bool enable_high_pass_filter = GetPropertyFromConstraints( 336 &native_constraints, MediaConstraintsInterface::kHighpassFilter); 337 338 // Return immediately if no audio processing component is enabled. 339 if (!enable_aec && !enable_experimental_aec && !enable_ns && 340 !enable_high_pass_filter && !enable_typing_detection && !enable_agc && 341 !enable_experimental_ns) { 342 RecordProcessingState(AUDIO_PROCESSING_DISABLED); 343 return; 344 } 345 346 // Create and configure the webrtc::AudioProcessing. 347 audio_processing_.reset(webrtc::AudioProcessing::Create(0)); 348 349 // Enable the audio processing components. 350 if (enable_aec) { 351 EnableEchoCancellation(audio_processing_.get()); 352 if (enable_experimental_aec) 353 EnableExperimentalEchoCancellation(audio_processing_.get()); 354 355 if (playout_data_source_) 356 playout_data_source_->AddPlayoutSink(this); 357 } 358 359 if (enable_ns) 360 EnableNoiseSuppression(audio_processing_.get()); 361 362 if (enable_experimental_ns) 363 EnableExperimentalNoiseSuppression(audio_processing_.get()); 364 365 if (enable_high_pass_filter) 366 EnableHighPassFilter(audio_processing_.get()); 367 368 if (enable_typing_detection) { 369 // TODO(xians): Remove this |typing_detector_| after the typing suppression 370 // is enabled by default. 371 typing_detector_.reset(new webrtc::TypingDetection()); 372 EnableTypingDetection(audio_processing_.get(), typing_detector_.get()); 373 } 374 375 if (enable_agc) 376 EnableAutomaticGainControl(audio_processing_.get()); 377 378 // Configure the audio format the audio processing is running on. This 379 // has to be done after all the needed components are enabled. 380 CHECK_EQ(0, 381 audio_processing_->set_sample_rate_hz(kAudioProcessingSampleRate)); 382 CHECK_EQ(0, audio_processing_->set_num_channels( 383 kAudioProcessingNumberOfChannels, kAudioProcessingNumberOfChannels)); 384 385 RecordProcessingState(AUDIO_PROCESSING_ENABLED); 386} 387 388void MediaStreamAudioProcessor::InitializeCaptureConverter( 389 const media::AudioParameters& source_params) { 390 DCHECK(main_thread_checker_.CalledOnValidThread()); 391 DCHECK(source_params.IsValid()); 392 393 // Create and initialize audio converter for the source data. 394 // When the webrtc AudioProcessing is enabled, the sink format of the 395 // converter will be the same as the post-processed data format, which is 396 // 32k mono for desktops and 16k mono for Android. When the AudioProcessing 397 // is disabled, the sink format will be the same as the source format. 398 const int sink_sample_rate = audio_processing_ ? 399 kAudioProcessingSampleRate : source_params.sample_rate(); 400 const media::ChannelLayout sink_channel_layout = audio_processing_ ? 401 media::GuessChannelLayout(kAudioProcessingNumberOfChannels) : 402 source_params.channel_layout(); 403 404 // WebRtc AudioProcessing requires 10ms as its packet size. We use this 405 // native size when processing is enabled. While processing is disabled, and 406 // the source is running with a buffer size smaller than 10ms buffer, we use 407 // same buffer size as the incoming format to avoid extra FIFO for WebAudio. 408 int sink_buffer_size = sink_sample_rate / 100; 409 if (!audio_processing_ && 410 source_params.frames_per_buffer() < sink_buffer_size) { 411 sink_buffer_size = source_params.frames_per_buffer(); 412 } 413 414 media::AudioParameters sink_params( 415 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, sink_channel_layout, 416 sink_sample_rate, 16, sink_buffer_size); 417 capture_converter_.reset( 418 new MediaStreamAudioConverter(source_params, sink_params)); 419} 420 421void MediaStreamAudioProcessor::InitializeRenderConverterIfNeeded( 422 int sample_rate, int number_of_channels, int frames_per_buffer) { 423 DCHECK(render_thread_checker_.CalledOnValidThread()); 424 // TODO(xians): Figure out if we need to handle the buffer size change. 425 if (render_converter_.get() && 426 render_converter_->source_parameters().sample_rate() == sample_rate && 427 render_converter_->source_parameters().channels() == number_of_channels) { 428 // Do nothing if the |render_converter_| has been setup properly. 429 return; 430 } 431 432 // Create and initialize audio converter for the render data. 433 // webrtc::AudioProcessing accepts the same format as what it uses to process 434 // capture data, which is 32k mono for desktops and 16k mono for Android. 435 media::AudioParameters source_params( 436 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, 437 media::GuessChannelLayout(number_of_channels), sample_rate, 16, 438 frames_per_buffer); 439 media::AudioParameters sink_params( 440 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, 441 media::CHANNEL_LAYOUT_MONO, kAudioProcessingSampleRate, 16, 442 kAudioProcessingSampleRate / 100); 443 render_converter_.reset( 444 new MediaStreamAudioConverter(source_params, sink_params)); 445 render_data_bus_ = media::AudioBus::Create(number_of_channels, 446 frames_per_buffer); 447} 448 449int MediaStreamAudioProcessor::ProcessData(webrtc::AudioFrame* audio_frame, 450 base::TimeDelta capture_delay, 451 int volume, 452 bool key_pressed) { 453 DCHECK(capture_thread_checker_.CalledOnValidThread()); 454 if (!audio_processing_) 455 return 0; 456 457 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData"); 458 DCHECK_EQ(audio_processing_->sample_rate_hz(), 459 capture_converter_->sink_parameters().sample_rate()); 460 DCHECK_EQ(audio_processing_->num_input_channels(), 461 capture_converter_->sink_parameters().channels()); 462 DCHECK_EQ(audio_processing_->num_output_channels(), 463 capture_converter_->sink_parameters().channels()); 464 465 base::subtle::Atomic32 render_delay_ms = 466 base::subtle::Acquire_Load(&render_delay_ms_); 467 int64 capture_delay_ms = capture_delay.InMilliseconds(); 468 DCHECK_LT(capture_delay_ms, 469 std::numeric_limits<base::subtle::Atomic32>::max()); 470 int total_delay_ms = capture_delay_ms + render_delay_ms; 471 if (total_delay_ms > 300) { 472 LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms 473 << "ms; render delay: " << render_delay_ms << "ms"; 474 } 475 476 audio_processing_->set_stream_delay_ms(total_delay_ms); 477 478 DCHECK_LE(volume, WebRtcAudioDeviceImpl::kMaxVolumeLevel); 479 webrtc::GainControl* agc = audio_processing_->gain_control(); 480 int err = agc->set_stream_analog_level(volume); 481 DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err; 482 483 audio_processing_->set_stream_key_pressed(key_pressed); 484 485 err = audio_processing_->ProcessStream(audio_frame); 486 DCHECK_EQ(err, 0) << "ProcessStream() error: " << err; 487 488 if (typing_detector_ && 489 audio_frame->vad_activity_ != webrtc::AudioFrame::kVadUnknown) { 490 bool vad_active = 491 (audio_frame->vad_activity_ == webrtc::AudioFrame::kVadActive); 492 bool typing_detected = typing_detector_->Process(key_pressed, vad_active); 493 base::subtle::Release_Store(&typing_detected_, typing_detected); 494 } 495 496 // Return 0 if the volume has not been changed, otherwise return the new 497 // volume. 498 return (agc->stream_analog_level() == volume) ? 499 0 : agc->stream_analog_level(); 500} 501 502void MediaStreamAudioProcessor::StopAudioProcessing() { 503 if (!audio_processing_.get()) 504 return; 505 506 StopAecDump(); 507 508 if (playout_data_source_) 509 playout_data_source_->RemovePlayoutSink(this); 510 511 audio_processing_.reset(); 512} 513 514bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() const { 515 const std::string group_name = 516 base::FieldTrialList::FindFullName("MediaStreamAudioTrackProcessing"); 517 return group_name == "Enabled" || CommandLine::ForCurrentProcess()->HasSwitch( 518 switches::kEnableAudioTrackProcessing); 519} 520 521} // namespace content 522