1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/renderer/media/media_stream_audio_processor.h"
6
7#include "base/command_line.h"
8#include "base/debug/trace_event.h"
9#if defined(OS_MACOSX)
10#include "base/metrics/field_trial.h"
11#endif
12#include "base/metrics/histogram.h"
13#include "content/public/common/content_switches.h"
14#include "content/renderer/media/media_stream_audio_processor_options.h"
15#include "content/renderer/media/rtc_media_constraints.h"
16#include "content/renderer/media/webrtc_audio_device_impl.h"
17#include "media/audio/audio_parameters.h"
18#include "media/base/audio_converter.h"
19#include "media/base/audio_fifo.h"
20#include "media/base/channel_layout.h"
21#include "third_party/WebKit/public/platform/WebMediaConstraints.h"
22#include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h"
23#include "third_party/webrtc/modules/audio_processing/typing_detection.h"
24
25namespace content {
26
27namespace {
28
29using webrtc::AudioProcessing;
30
31#if defined(OS_ANDROID)
32const int kAudioProcessingSampleRate = 16000;
33#else
34const int kAudioProcessingSampleRate = 32000;
35#endif
36const int kAudioProcessingNumberOfChannels = 1;
37
38AudioProcessing::ChannelLayout MapLayout(media::ChannelLayout media_layout) {
39  switch (media_layout) {
40    case media::CHANNEL_LAYOUT_MONO:
41      return AudioProcessing::kMono;
42    case media::CHANNEL_LAYOUT_STEREO:
43      return AudioProcessing::kStereo;
44    case media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
45      return AudioProcessing::kStereoAndKeyboard;
46    default:
47      NOTREACHED() << "Layout not supported: " << media_layout;
48      return AudioProcessing::kMono;
49  }
50}
51
52AudioProcessing::ChannelLayout ChannelsToLayout(int num_channels) {
53  switch (num_channels) {
54    case 1:
55      return AudioProcessing::kMono;
56    case 2:
57      return AudioProcessing::kStereo;
58    default:
59      NOTREACHED() << "Channels not supported: " << num_channels;
60      return AudioProcessing::kMono;
61  }
62}
63
64// Used by UMA histograms and entries shouldn't be re-ordered or removed.
65enum AudioTrackProcessingStates {
66  AUDIO_PROCESSING_ENABLED = 0,
67  AUDIO_PROCESSING_DISABLED,
68  AUDIO_PROCESSING_IN_WEBRTC,
69  AUDIO_PROCESSING_MAX
70};
71
72void RecordProcessingState(AudioTrackProcessingStates state) {
73  UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates",
74                            state, AUDIO_PROCESSING_MAX);
75}
76
77}  // namespace
78
79// Wraps AudioBus to provide access to the array of channel pointers, since this
80// is the type webrtc::AudioProcessing deals in. The array is refreshed on every
81// channel_ptrs() call, and will be valid until the underlying AudioBus pointers
82// are changed, e.g. through calls to SetChannelData() or SwapChannels().
83//
84// All methods are called on one of the capture or render audio threads
85// exclusively.
86class MediaStreamAudioBus {
87 public:
88  MediaStreamAudioBus(int channels, int frames)
89      : bus_(media::AudioBus::Create(channels, frames)),
90        channel_ptrs_(new float*[channels]) {
91    // May be created in the main render thread and used in the audio threads.
92    thread_checker_.DetachFromThread();
93  }
94
95  media::AudioBus* bus() {
96    DCHECK(thread_checker_.CalledOnValidThread());
97    return bus_.get();
98  }
99
100  float* const* channel_ptrs() {
101    DCHECK(thread_checker_.CalledOnValidThread());
102    for (int i = 0; i < bus_->channels(); ++i) {
103      channel_ptrs_[i] = bus_->channel(i);
104    }
105    return channel_ptrs_.get();
106  }
107
108 private:
109  base::ThreadChecker thread_checker_;
110  scoped_ptr<media::AudioBus> bus_;
111  scoped_ptr<float*[]> channel_ptrs_;
112};
113
114// Wraps AudioFifo to provide a cleaner interface to MediaStreamAudioProcessor.
115// It avoids the FIFO when the source and destination frames match. All methods
116// are called on one of the capture or render audio threads exclusively.
117class MediaStreamAudioFifo {
118 public:
119  MediaStreamAudioFifo(int channels, int source_frames,
120                       int destination_frames)
121     : source_frames_(source_frames),
122       destination_(new MediaStreamAudioBus(channels, destination_frames)),
123       data_available_(false) {
124    if (source_frames != destination_frames) {
125      // Since we require every Push to be followed by as many Consumes as
126      // possible, twice the larger of the two is a (probably) loose upper bound
127      // on the FIFO size.
128      const int fifo_frames = 2 * std::max(source_frames, destination_frames);
129      fifo_.reset(new media::AudioFifo(channels, fifo_frames));
130    }
131
132    // May be created in the main render thread and used in the audio threads.
133    thread_checker_.DetachFromThread();
134  }
135
136  void Push(const media::AudioBus* source) {
137    DCHECK(thread_checker_.CalledOnValidThread());
138    DCHECK_EQ(source->channels(), destination_->bus()->channels());
139    DCHECK_EQ(source->frames(), source_frames_);
140
141    if (fifo_) {
142      fifo_->Push(source);
143    } else {
144      source->CopyTo(destination_->bus());
145      data_available_ = true;
146    }
147  }
148
149  // Returns true if there are destination_frames() of data available to be
150  // consumed, and otherwise false.
151  bool Consume(MediaStreamAudioBus** destination) {
152    DCHECK(thread_checker_.CalledOnValidThread());
153
154    if (fifo_) {
155      if (fifo_->frames() < destination_->bus()->frames())
156        return false;
157
158      fifo_->Consume(destination_->bus(), 0, destination_->bus()->frames());
159    } else {
160      if (!data_available_)
161        return false;
162
163      // The data was already copied to |destination_| in this case.
164      data_available_ = false;
165    }
166
167    *destination = destination_.get();
168    return true;
169  }
170
171 private:
172  base::ThreadChecker thread_checker_;
173  const int source_frames_;  // For a DCHECK.
174  scoped_ptr<MediaStreamAudioBus> destination_;
175  scoped_ptr<media::AudioFifo> fifo_;
176  // Only used when the FIFO is disabled;
177  bool data_available_;
178};
179
180bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() {
181  return !CommandLine::ForCurrentProcess()->HasSwitch(
182      switches::kDisableAudioTrackProcessing);
183}
184
185MediaStreamAudioProcessor::MediaStreamAudioProcessor(
186    const blink::WebMediaConstraints& constraints,
187    int effects,
188    WebRtcPlayoutDataSource* playout_data_source)
189    : render_delay_ms_(0),
190      playout_data_source_(playout_data_source),
191      audio_mirroring_(false),
192      typing_detected_(false),
193      stopped_(false) {
194  capture_thread_checker_.DetachFromThread();
195  render_thread_checker_.DetachFromThread();
196  InitializeAudioProcessingModule(constraints, effects);
197  if (IsAudioTrackProcessingEnabled()) {
198    aec_dump_message_filter_ = AecDumpMessageFilter::Get();
199    // In unit tests not creating a message filter, |aec_dump_message_filter_|
200    // will be NULL. We can just ignore that. Other unit tests and browser tests
201    // ensure that we do get the filter when we should.
202    if (aec_dump_message_filter_.get())
203      aec_dump_message_filter_->AddDelegate(this);
204  }
205}
206
207MediaStreamAudioProcessor::~MediaStreamAudioProcessor() {
208  DCHECK(main_thread_checker_.CalledOnValidThread());
209  Stop();
210}
211
212void MediaStreamAudioProcessor::OnCaptureFormatChanged(
213    const media::AudioParameters& input_format) {
214  DCHECK(main_thread_checker_.CalledOnValidThread());
215  // There is no need to hold a lock here since the caller guarantees that
216  // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks
217  // on the capture thread.
218  InitializeCaptureFifo(input_format);
219
220  // Reset the |capture_thread_checker_| since the capture data will come from
221  // a new capture thread.
222  capture_thread_checker_.DetachFromThread();
223}
224
225void MediaStreamAudioProcessor::PushCaptureData(
226    const media::AudioBus* audio_source) {
227  DCHECK(capture_thread_checker_.CalledOnValidThread());
228
229  capture_fifo_->Push(audio_source);
230}
231
232bool MediaStreamAudioProcessor::ProcessAndConsumeData(
233    base::TimeDelta capture_delay, int volume, bool key_pressed,
234    int* new_volume, int16** out) {
235  DCHECK(capture_thread_checker_.CalledOnValidThread());
236  TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData");
237
238  MediaStreamAudioBus* process_bus;
239  if (!capture_fifo_->Consume(&process_bus))
240    return false;
241
242  // Use the process bus directly if audio processing is disabled.
243  MediaStreamAudioBus* output_bus = process_bus;
244  *new_volume = 0;
245  if (audio_processing_) {
246    output_bus = output_bus_.get();
247    *new_volume = ProcessData(process_bus->channel_ptrs(),
248                              process_bus->bus()->frames(), capture_delay,
249                              volume, key_pressed, output_bus->channel_ptrs());
250  }
251
252  // Swap channels before interleaving the data.
253  if (audio_mirroring_ &&
254      output_format_.channel_layout() == media::CHANNEL_LAYOUT_STEREO) {
255    // Swap the first and second channels.
256    output_bus->bus()->SwapChannels(0, 1);
257  }
258
259  output_bus->bus()->ToInterleaved(output_bus->bus()->frames(),
260                                   sizeof(int16),
261                                   output_data_.get());
262  *out = output_data_.get();
263
264  return true;
265}
266
267void MediaStreamAudioProcessor::Stop() {
268  DCHECK(main_thread_checker_.CalledOnValidThread());
269  if (stopped_)
270    return;
271
272  stopped_ = true;
273
274  if (aec_dump_message_filter_.get()) {
275    aec_dump_message_filter_->RemoveDelegate(this);
276    aec_dump_message_filter_ = NULL;
277  }
278
279  if (!audio_processing_.get())
280    return;
281
282  StopEchoCancellationDump(audio_processing_.get());
283
284  if (playout_data_source_) {
285    playout_data_source_->RemovePlayoutSink(this);
286    playout_data_source_ = NULL;
287  }
288}
289
290const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const {
291  return input_format_;
292}
293
294const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const {
295  return output_format_;
296}
297
298void MediaStreamAudioProcessor::OnAecDumpFile(
299    const IPC::PlatformFileForTransit& file_handle) {
300  DCHECK(main_thread_checker_.CalledOnValidThread());
301
302  base::File file = IPC::PlatformFileForTransitToFile(file_handle);
303  DCHECK(file.IsValid());
304
305  if (audio_processing_)
306    StartEchoCancellationDump(audio_processing_.get(), file.Pass());
307  else
308    file.Close();
309}
310
311void MediaStreamAudioProcessor::OnDisableAecDump() {
312  DCHECK(main_thread_checker_.CalledOnValidThread());
313  if (audio_processing_)
314    StopEchoCancellationDump(audio_processing_.get());
315}
316
317void MediaStreamAudioProcessor::OnIpcClosing() {
318  DCHECK(main_thread_checker_.CalledOnValidThread());
319  aec_dump_message_filter_ = NULL;
320}
321
322void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus,
323                                              int sample_rate,
324                                              int audio_delay_milliseconds) {
325  DCHECK(render_thread_checker_.CalledOnValidThread());
326  DCHECK(audio_processing_->echo_control_mobile()->is_enabled() ^
327         audio_processing_->echo_cancellation()->is_enabled());
328
329  TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData");
330  DCHECK_LT(audio_delay_milliseconds,
331            std::numeric_limits<base::subtle::Atomic32>::max());
332  base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds);
333
334  InitializeRenderFifoIfNeeded(sample_rate, audio_bus->channels(),
335                               audio_bus->frames());
336
337  render_fifo_->Push(audio_bus);
338  MediaStreamAudioBus* analysis_bus;
339  while (render_fifo_->Consume(&analysis_bus)) {
340    audio_processing_->AnalyzeReverseStream(
341        analysis_bus->channel_ptrs(),
342        analysis_bus->bus()->frames(),
343        sample_rate,
344        ChannelsToLayout(audio_bus->channels()));
345  }
346}
347
348void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() {
349  DCHECK(main_thread_checker_.CalledOnValidThread());
350  // There is no need to hold a lock here since the caller guarantees that
351  // there is no more OnPlayoutData() callback on the render thread.
352  render_thread_checker_.DetachFromThread();
353  render_fifo_.reset();
354}
355
356void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) {
357  stats->typing_noise_detected =
358      (base::subtle::Acquire_Load(&typing_detected_) != false);
359  GetAecStats(audio_processing_.get(), stats);
360}
361
362void MediaStreamAudioProcessor::InitializeAudioProcessingModule(
363    const blink::WebMediaConstraints& constraints, int effects) {
364  DCHECK(!audio_processing_);
365
366  MediaAudioConstraints audio_constraints(constraints, effects);
367
368  // Audio mirroring can be enabled even though audio processing is otherwise
369  // disabled.
370  audio_mirroring_ = audio_constraints.GetProperty(
371      MediaAudioConstraints::kGoogAudioMirroring);
372
373  if (!IsAudioTrackProcessingEnabled()) {
374    RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC);
375    return;
376  }
377
378#if defined(OS_IOS)
379  // On iOS, VPIO provides built-in AGC and AEC.
380  const bool echo_cancellation = false;
381  const bool goog_agc = false;
382#else
383  const bool echo_cancellation =
384      audio_constraints.GetEchoCancellationProperty();
385  const bool goog_agc = audio_constraints.GetProperty(
386      MediaAudioConstraints::kGoogAutoGainControl);
387#endif
388
389#if defined(OS_IOS) || defined(OS_ANDROID)
390  const bool goog_experimental_aec = false;
391  const bool goog_typing_detection = false;
392#else
393  const bool goog_experimental_aec = audio_constraints.GetProperty(
394      MediaAudioConstraints::kGoogExperimentalEchoCancellation);
395  const bool goog_typing_detection = audio_constraints.GetProperty(
396      MediaAudioConstraints::kGoogTypingNoiseDetection);
397#endif
398
399  const bool goog_ns = audio_constraints.GetProperty(
400      MediaAudioConstraints::kGoogNoiseSuppression);
401  const bool goog_experimental_ns = audio_constraints.GetProperty(
402      MediaAudioConstraints::kGoogExperimentalNoiseSuppression);
403 const bool goog_high_pass_filter = audio_constraints.GetProperty(
404     MediaAudioConstraints::kGoogHighpassFilter);
405
406  // Return immediately if no goog constraint is enabled.
407  if (!echo_cancellation && !goog_experimental_aec && !goog_ns &&
408      !goog_high_pass_filter && !goog_typing_detection &&
409      !goog_agc && !goog_experimental_ns) {
410    RecordProcessingState(AUDIO_PROCESSING_DISABLED);
411    return;
412  }
413
414  // Experimental options provided at creation.
415  webrtc::Config config;
416  if (goog_experimental_aec)
417    config.Set<webrtc::DelayCorrection>(new webrtc::DelayCorrection(true));
418  if (goog_experimental_ns)
419    config.Set<webrtc::ExperimentalNs>(new webrtc::ExperimentalNs(true));
420#if defined(OS_MACOSX)
421  if (base::FieldTrialList::FindFullName("NoReportedDelayOnMac") == "Enabled")
422    config.Set<webrtc::ReportedDelay>(new webrtc::ReportedDelay(false));
423#endif
424
425  // Create and configure the webrtc::AudioProcessing.
426  audio_processing_.reset(webrtc::AudioProcessing::Create(config));
427
428  // Enable the audio processing components.
429  if (echo_cancellation) {
430    EnableEchoCancellation(audio_processing_.get());
431
432    if (playout_data_source_)
433      playout_data_source_->AddPlayoutSink(this);
434  }
435
436  if (goog_ns)
437    EnableNoiseSuppression(audio_processing_.get());
438
439  if (goog_high_pass_filter)
440    EnableHighPassFilter(audio_processing_.get());
441
442  if (goog_typing_detection) {
443    // TODO(xians): Remove this |typing_detector_| after the typing suppression
444    // is enabled by default.
445    typing_detector_.reset(new webrtc::TypingDetection());
446    EnableTypingDetection(audio_processing_.get(), typing_detector_.get());
447  }
448
449  if (goog_agc)
450    EnableAutomaticGainControl(audio_processing_.get());
451
452  RecordProcessingState(AUDIO_PROCESSING_ENABLED);
453}
454
455void MediaStreamAudioProcessor::InitializeCaptureFifo(
456    const media::AudioParameters& input_format) {
457  DCHECK(main_thread_checker_.CalledOnValidThread());
458  DCHECK(input_format.IsValid());
459  input_format_ = input_format;
460
461  // TODO(ajm): For now, we assume fixed parameters for the output when audio
462  // processing is enabled, to match the previous behavior. We should either
463  // use the input parameters (in which case, audio processing will convert
464  // at output) or ideally, have a backchannel from the sink to know what
465  // format it would prefer.
466  const int output_sample_rate = audio_processing_ ?
467      kAudioProcessingSampleRate : input_format.sample_rate();
468  const media::ChannelLayout output_channel_layout = audio_processing_ ?
469      media::GuessChannelLayout(kAudioProcessingNumberOfChannels) :
470      input_format.channel_layout();
471
472  // webrtc::AudioProcessing requires a 10 ms chunk size. We use this native
473  // size when processing is enabled. When disabled we use the same size as
474  // the source if less than 10 ms.
475  //
476  // TODO(ajm): This conditional buffer size appears to be assuming knowledge of
477  // the sink based on the source parameters. PeerConnection sinks seem to want
478  // 10 ms chunks regardless, while WebAudio sinks want less, and we're assuming
479  // we can identify WebAudio sinks by the input chunk size. Less fragile would
480  // be to have the sink actually tell us how much it wants (as in the above
481  // TODO).
482  int processing_frames = input_format.sample_rate() / 100;
483  int output_frames = output_sample_rate / 100;
484  if (!audio_processing_ && input_format.frames_per_buffer() < output_frames) {
485    processing_frames = input_format.frames_per_buffer();
486    output_frames = processing_frames;
487  }
488
489  output_format_ = media::AudioParameters(
490      media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
491      output_channel_layout,
492      output_sample_rate,
493      16,
494      output_frames);
495
496  capture_fifo_.reset(
497      new MediaStreamAudioFifo(input_format.channels(),
498                               input_format.frames_per_buffer(),
499                               processing_frames));
500
501  if (audio_processing_) {
502    output_bus_.reset(new MediaStreamAudioBus(output_format_.channels(),
503                                              output_frames));
504  }
505  output_data_.reset(new int16[output_format_.GetBytesPerBuffer() /
506                               sizeof(int16)]);
507}
508
509void MediaStreamAudioProcessor::InitializeRenderFifoIfNeeded(
510    int sample_rate, int number_of_channels, int frames_per_buffer) {
511  DCHECK(render_thread_checker_.CalledOnValidThread());
512  if (render_fifo_.get() &&
513      render_format_.sample_rate() == sample_rate &&
514      render_format_.channels() == number_of_channels &&
515      render_format_.frames_per_buffer() == frames_per_buffer) {
516    // Do nothing if the |render_fifo_| has been setup properly.
517    return;
518  }
519
520  render_format_ = media::AudioParameters(
521      media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
522      media::GuessChannelLayout(number_of_channels),
523      sample_rate,
524      16,
525      frames_per_buffer);
526
527  const int analysis_frames = sample_rate / 100;  // 10 ms chunks.
528  render_fifo_.reset(
529      new MediaStreamAudioFifo(number_of_channels,
530                               frames_per_buffer,
531                               analysis_frames));
532}
533
534int MediaStreamAudioProcessor::ProcessData(const float* const* process_ptrs,
535                                           int process_frames,
536                                           base::TimeDelta capture_delay,
537                                           int volume,
538                                           bool key_pressed,
539                                           float* const* output_ptrs) {
540  DCHECK(audio_processing_);
541  DCHECK(capture_thread_checker_.CalledOnValidThread());
542
543  TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData");
544
545  base::subtle::Atomic32 render_delay_ms =
546      base::subtle::Acquire_Load(&render_delay_ms_);
547  int64 capture_delay_ms = capture_delay.InMilliseconds();
548  DCHECK_LT(capture_delay_ms,
549            std::numeric_limits<base::subtle::Atomic32>::max());
550  int total_delay_ms =  capture_delay_ms + render_delay_ms;
551  if (total_delay_ms > 300) {
552    LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms
553                 << "ms; render delay: " << render_delay_ms << "ms";
554  }
555
556  webrtc::AudioProcessing* ap = audio_processing_.get();
557  ap->set_stream_delay_ms(total_delay_ms);
558
559  DCHECK_LE(volume, WebRtcAudioDeviceImpl::kMaxVolumeLevel);
560  webrtc::GainControl* agc = ap->gain_control();
561  int err = agc->set_stream_analog_level(volume);
562  DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err;
563
564  ap->set_stream_key_pressed(key_pressed);
565
566  err = ap->ProcessStream(process_ptrs,
567                          process_frames,
568                          input_format_.sample_rate(),
569                          MapLayout(input_format_.channel_layout()),
570                          output_format_.sample_rate(),
571                          MapLayout(output_format_.channel_layout()),
572                          output_ptrs);
573  DCHECK_EQ(err, 0) << "ProcessStream() error: " << err;
574
575  if (typing_detector_) {
576    webrtc::VoiceDetection* vad = ap->voice_detection();
577    DCHECK(vad->is_enabled());
578    bool detected = typing_detector_->Process(key_pressed,
579                                              vad->stream_has_voice());
580    base::subtle::Release_Store(&typing_detected_, detected);
581  }
582
583  // Return 0 if the volume hasn't been changed, and otherwise the new volume.
584  return (agc->stream_analog_level() == volume) ?
585      0 : agc->stream_analog_level();
586}
587
588}  // namespace content
589