1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
6#define CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
7
8#include <string>
9#include <vector>
10
11#include "base/basictypes.h"
12#include "base/compiler_specific.h"
13#include "base/logging.h"
14#include "base/memory/ref_counted.h"
15#include "base/memory/scoped_ptr.h"
16#include "base/threading/thread_checker.h"
17#include "content/common/content_export.h"
18#include "content/renderer/media/webrtc_audio_capturer.h"
19#include "content/renderer/media/webrtc_audio_device_not_impl.h"
20#include "content/renderer/media/webrtc_audio_renderer.h"
21#include "media/base/audio_capturer_source.h"
22#include "media/base/audio_renderer_sink.h"
23
24// A WebRtcAudioDeviceImpl instance implements the abstract interface
25// webrtc::AudioDeviceModule which makes it possible for a user (e.g. webrtc::
26// VoiceEngine) to register this class as an external AudioDeviceModule (ADM).
27// Then WebRtcAudioDeviceImpl::SetSessionId() needs to be called to set the
28// session id that tells which device to use. The user can then call
29// WebRtcAudioDeviceImpl::StartPlayout() and
30// WebRtcAudioDeviceImpl::StartRecording() from the render process to initiate
31// and start audio rendering and capturing in the browser process. IPC is
32// utilized to set up the media streams.
33//
34// Usage example:
35//
36//   using namespace webrtc;
37//
38//   {
39//      scoped_refptr<WebRtcAudioDeviceImpl> external_adm;
40//      external_adm = new WebRtcAudioDeviceImpl();
41//      external_adm->SetSessionId(session_id);
42//      VoiceEngine* voe = VoiceEngine::Create();
43//      VoEBase* base = VoEBase::GetInterface(voe);
44//      base->Init(external_adm);
45//      int ch = base->CreateChannel();
46//      ...
47//      base->StartReceive(ch)
48//      base->StartPlayout(ch);
49//      base->StartSending(ch);
50//      ...
51//      <== full-duplex audio session with AGC enabled ==>
52//      ...
53//      base->DeleteChannel(ch);
54//      base->Terminate();
55//      base->Release();
56//      VoiceEngine::Delete(voe);
57//   }
58//
59// webrtc::VoiceEngine::Init() calls these ADM methods (in this order):
60//
61//  RegisterAudioCallback(this)
62//    webrtc::VoiceEngine is an webrtc::AudioTransport implementation and
63//    implements the RecordedDataIsAvailable() and NeedMorePlayData() callbacks.
64//
65//  Init()
66//    Creates and initializes the AudioOutputDevice and AudioInputDevice
67//    objects.
68//
69//  SetAGC(true)
70//    Enables the adaptive analog mode of the AGC which ensures that a
71//    suitable microphone volume level will be set. This scheme will affect
72//    the actual microphone control slider.
73//
74// AGC overview:
75//
76// It aims to maintain a constant speech loudness level from the microphone.
77// This is done by both controlling the analog microphone gain and applying
78// digital gain. The microphone gain on the sound card is slowly
79// increased/decreased during speech only. By observing the microphone control
80// slider you can see it move when you speak. If you scream, the slider moves
81// downwards and then upwards again when you return to normal. It is not
82// uncommon that the slider hits the maximum. This means that the maximum
83// analog gain is not large enough to give the desired loudness. Nevertheless,
84// we can in general still attain the desired loudness. If the microphone
85// control slider is moved manually, the gain adaptation restarts and returns
86// to roughly the same position as before the change if the circumstances are
87// still the same. When the input microphone signal causes saturation, the
88// level is decreased dramatically and has to re-adapt towards the old level.
89// The adaptation is a slowly varying process and at the beginning of capture
90// this is noticed by a slow increase in volume. Smaller changes in microphone
91// input level is leveled out by the built-in digital control. For larger
92// differences we need to rely on the slow adaptation.
93// See http://en.wikipedia.org/wiki/Automatic_gain_control for more details.
94//
95// AGC implementation details:
96//
97// The adaptive analog mode of the AGC is always enabled for desktop platforms
98// in WebRTC.
99//
100// Before recording starts, the ADM enables AGC on the AudioInputDevice.
101//
102// A capture session with AGC is started up as follows (simplified):
103//
104//                            [renderer]
105//                                |
106//                     ADM::StartRecording()
107//             AudioInputDevice::InitializeOnIOThread()
108//           AudioInputHostMsg_CreateStream(..., agc=true)               [IPC]
109//                                |
110//                       [IPC to the browser]
111//                                |
112//              AudioInputRendererHost::OnCreateStream()
113//              AudioInputController::CreateLowLatency()
114//         AudioInputController::DoSetAutomaticGainControl(true)
115//            AudioInputStream::SetAutomaticGainControl(true)
116//                                |
117// AGC is now enabled in the media layer and streaming starts (details omitted).
118// The figure below illustrates the AGC scheme which is active in combination
119// with the default media flow explained earlier.
120//                                |
121//                            [browser]
122//                                |
123//                AudioInputStream::(Capture thread loop)
124//  AgcAudioStream<AudioInputStream>::GetAgcVolume() => get latest mic volume
125//                 AudioInputData::OnData(..., volume)
126//              AudioInputController::OnData(..., volume)
127//               AudioInputSyncWriter::Write(..., volume)
128//                                |
129//      [volume | size | data] is sent to the renderer         [shared memory]
130//                                |
131//                            [renderer]
132//                                |
133//          AudioInputDevice::AudioThreadCallback::Process()
134//            WebRtcAudioDeviceImpl::Capture(..., volume)
135//    AudioTransport::RecordedDataIsAvailable(...,volume, new_volume)
136//                                |
137// The AGC now uses the current volume input and computes a suitable new
138// level given by the |new_level| output. This value is only non-zero if the
139// AGC has take a decision that the microphone level should change.
140//                                |
141//                      if (new_volume != 0)
142//              AudioInputDevice::SetVolume(new_volume)
143//              AudioInputHostMsg_SetVolume(new_volume)                  [IPC]
144//                                |
145//                       [IPC to the browser]
146//                                |
147//                 AudioInputRendererHost::OnSetVolume()
148//                  AudioInputController::SetVolume()
149//             AudioInputStream::SetVolume(scaled_volume)
150//                                |
151// Here we set the new microphone level in the media layer and at the same time
152// read the new setting (we might not get exactly what is set).
153//                                |
154//             AudioInputData::OnData(..., updated_volume)
155//           AudioInputController::OnData(..., updated_volume)
156//                                |
157//                                |
158// This process repeats until we stop capturing data. Note that, a common
159// steady state is that the volume control reaches its max and the new_volume
160// value from the AGC is zero. A loud voice input is required to break this
161// state and start lowering the level again.
162//
163// Implementation notes:
164//
165//  - This class must be created and destroyed on the main render thread and
166//    most methods are called on the same thread. However, some methods are
167//    also called on a Libjingle worker thread. RenderData is called on the
168//    AudioOutputDevice thread and CaptureData on the AudioInputDevice thread.
169//    To summarize: this class lives on four different threads.
170//  - The webrtc::AudioDeviceModule is reference counted.
171//  - AGC is only supported in combination with the WASAPI-based audio layer
172//    on Windows, i.e., it is not supported on Windows XP.
173//  - All volume levels required for the AGC scheme are transfered in a
174//    normalized range [0.0, 1.0]. Scaling takes place in both endpoints
175//    (WebRTC client a media layer). This approach ensures that we can avoid
176//    transferring maximum levels between the renderer and the browser.
177//
178
179namespace content {
180
181class WebRtcAudioCapturer;
182class WebRtcAudioRenderer;
183
184// TODO(xians): Move the following two interfaces to webrtc so that
185// libjingle can own references to the renderer and capturer.
186class WebRtcAudioRendererSource {
187 public:
188  // Callback to get the rendered interleaved data.
189  // TODO(xians): Change uint8* to int16*.
190  virtual void RenderData(uint8* audio_data,
191                          int number_of_channels,
192                          int number_of_frames,
193                          int audio_delay_milliseconds) = 0;
194
195  // Set the format for the capture audio parameters.
196  virtual void SetRenderFormat(const media::AudioParameters& params) = 0;
197
198  // Callback to notify the client that the renderer is going away.
199  virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) = 0;
200
201 protected:
202  virtual ~WebRtcAudioRendererSource() {}
203};
204
205class PeerConnectionAudioSink {
206 public:
207  // Callback to deliver the captured interleaved data.
208  // |channels| contains a vector of WebRtc VoE channels.
209  // |audio_data| is the pointer to the audio data.
210  // |sample_rate| is the sample frequency of audio data.
211  // |number_of_channels| is the number of channels reflecting the order of
212  // surround sound channels.
213  // |audio_delay_milliseconds| is recording delay value.
214  // |current_volume| is current microphone volume, in range of |0, 255].
215  // |need_audio_processing| indicates if the audio needs WebRtc AEC/NS/AGC
216  // audio processing.
217  // The return value is the new microphone volume, in the range of |0, 255].
218  // When the volume does not need to be updated, it returns 0.
219  virtual int OnData(const int16* audio_data,
220                     int sample_rate,
221                     int number_of_channels,
222                     int number_of_frames,
223                     const std::vector<int>& channels,
224                     int audio_delay_milliseconds,
225                     int current_volume,
226                     bool need_audio_processing,
227                     bool key_pressed) = 0;
228
229  // Set the format for the capture audio parameters.
230  // This is called when the capture format has changed, and it must be called
231  // on the same thread as calling CaptureData().
232  virtual void OnSetFormat(const media::AudioParameters& params) = 0;
233
234 protected:
235 virtual ~PeerConnectionAudioSink() {}
236};
237
238// Note that this class inherits from webrtc::AudioDeviceModule but due to
239// the high number of non-implemented methods, we move the cruft over to the
240// WebRtcAudioDeviceNotImpl.
241class CONTENT_EXPORT WebRtcAudioDeviceImpl
242    : NON_EXPORTED_BASE(public PeerConnectionAudioSink),
243      NON_EXPORTED_BASE(public WebRtcAudioDeviceNotImpl),
244      NON_EXPORTED_BASE(public WebRtcAudioRendererSource) {
245 public:
246  // The maximum volume value WebRtc uses.
247  static const int kMaxVolumeLevel = 255;
248
249  // Instances of this object are created on the main render thread.
250  WebRtcAudioDeviceImpl();
251
252  // webrtc::RefCountedModule implementation.
253  // The creator must call AddRef() after construction and use Release()
254  // to release the reference and delete this object.
255  // Called on the main render thread.
256  virtual int32_t AddRef() OVERRIDE;
257  virtual int32_t Release() OVERRIDE;
258
259  // webrtc::AudioDeviceModule implementation.
260  // All implemented methods are called on the main render thread unless
261  // anything else is stated.
262
263  virtual int32_t RegisterAudioCallback(webrtc::AudioTransport* audio_callback)
264      OVERRIDE;
265
266  virtual int32_t Init() OVERRIDE;
267  virtual int32_t Terminate() OVERRIDE;
268  virtual bool Initialized() const OVERRIDE;
269
270  virtual int32_t PlayoutIsAvailable(bool* available) OVERRIDE;
271  virtual bool PlayoutIsInitialized() const OVERRIDE;
272  virtual int32_t RecordingIsAvailable(bool* available) OVERRIDE;
273  virtual bool RecordingIsInitialized() const OVERRIDE;
274
275  // All Start/Stop methods are called on a libJingle worker thread.
276  virtual int32_t StartPlayout() OVERRIDE;
277  virtual int32_t StopPlayout() OVERRIDE;
278  virtual bool Playing() const OVERRIDE;
279  virtual int32_t StartRecording() OVERRIDE;
280  virtual int32_t StopRecording() OVERRIDE;
281  virtual bool Recording() const OVERRIDE;
282
283  // Called on the AudioInputDevice worker thread.
284  virtual int32_t SetMicrophoneVolume(uint32_t volume) OVERRIDE;
285
286  // TODO(henrika): sort out calling thread once we start using this API.
287  virtual int32_t MicrophoneVolume(uint32_t* volume) const OVERRIDE;
288
289  virtual int32_t MaxMicrophoneVolume(uint32_t* max_volume) const OVERRIDE;
290  virtual int32_t MinMicrophoneVolume(uint32_t* min_volume) const OVERRIDE;
291  virtual int32_t StereoPlayoutIsAvailable(bool* available) const OVERRIDE;
292  virtual int32_t StereoRecordingIsAvailable(bool* available) const OVERRIDE;
293  virtual int32_t PlayoutDelay(uint16_t* delay_ms) const OVERRIDE;
294  virtual int32_t RecordingDelay(uint16_t* delay_ms) const OVERRIDE;
295  virtual int32_t RecordingSampleRate(uint32_t* samples_per_sec) const OVERRIDE;
296  virtual int32_t PlayoutSampleRate(uint32_t* samples_per_sec) const OVERRIDE;
297
298  // Sets the |renderer_|, returns false if |renderer_| already exists.
299  // Called on the main renderer thread.
300  bool SetAudioRenderer(WebRtcAudioRenderer* renderer);
301
302  // Adds the capturer to the ADM.
303  void AddAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer);
304
305  // Gets the default capturer, which is the capturer in the list with
306  // a valid |device_id|. Microphones are represented by capturers with a valid
307  // |device_id|, since only one microphone is supported today, only one
308  // capturer in the |capturers_| can have a valid |device_id|.
309  scoped_refptr<WebRtcAudioCapturer> GetDefaultCapturer() const;
310
311  const scoped_refptr<WebRtcAudioRenderer>& renderer() const {
312    return renderer_;
313  }
314  int output_buffer_size() const {
315    return output_audio_parameters_.frames_per_buffer();
316  }
317  int output_channels() const {
318    return output_audio_parameters_.channels();
319  }
320  int output_sample_rate() const {
321    return output_audio_parameters_.sample_rate();
322  }
323
324 private:
325  typedef std::list<scoped_refptr<WebRtcAudioCapturer> > CapturerList;
326
327  // Make destructor private to ensure that we can only be deleted by Release().
328  virtual ~WebRtcAudioDeviceImpl();
329
330  // PeerConnectionAudioSink implementation.
331
332  // Called on the AudioInputDevice worker thread.
333  virtual int OnData(const int16* audio_data,
334                     int sample_rate,
335                     int number_of_channels,
336                     int number_of_frames,
337                     const std::vector<int>& channels,
338                     int audio_delay_milliseconds,
339                     int current_volume,
340                     bool need_audio_processing,
341                     bool key_pressed) OVERRIDE;
342
343  // Called on the AudioInputDevice worker thread.
344  virtual void OnSetFormat(const media::AudioParameters& params) OVERRIDE;
345
346  // WebRtcAudioRendererSource implementation.
347
348  // Called on the AudioInputDevice worker thread.
349  virtual void RenderData(uint8* audio_data,
350                          int number_of_channels,
351                          int number_of_frames,
352                          int audio_delay_milliseconds) OVERRIDE;
353
354  // Called on the main render thread.
355  virtual void SetRenderFormat(const media::AudioParameters& params) OVERRIDE;
356  virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) OVERRIDE;
357
358  // Used to DCHECK that we are called on the correct thread.
359  base::ThreadChecker thread_checker_;
360
361  int ref_count_;
362
363  // List of captures which provides access to the native audio input layer
364  // in the browser process.
365  CapturerList capturers_;
366
367  // Provides access to the audio renderer in the browser process.
368  scoped_refptr<WebRtcAudioRenderer> renderer_;
369
370  // Weak reference to the audio callback.
371  // The webrtc client defines |audio_transport_callback_| by calling
372  // RegisterAudioCallback().
373  webrtc::AudioTransport* audio_transport_callback_;
374
375  // Cached values of used output audio parameters. Platform dependent.
376  media::AudioParameters output_audio_parameters_;
377
378  // Cached value of the current audio delay on the input/capture side.
379  int input_delay_ms_;
380
381  // Cached value of the current audio delay on the output/renderer side.
382  int output_delay_ms_;
383
384  // Protects |recording_|, |output_delay_ms_|, |input_delay_ms_|, |renderer_|
385  // |recording_| and |microphone_volume_|.
386  mutable base::Lock lock_;
387
388  bool initialized_;
389  bool playing_;
390  bool recording_;
391
392  // Used for histograms of total recording and playout times.
393  base::Time start_capture_time_;
394  base::Time start_render_time_;
395
396  // Stores latest microphone volume received in a CaptureData() callback.
397  // Range is [0, 255].
398  uint32_t microphone_volume_;
399
400  DISALLOW_COPY_AND_ASSIGN(WebRtcAudioDeviceImpl);
401};
402
403}  // namespace content
404
405#endif  // CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
406