1e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/*
2e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
4e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  Use of this source code is governed by a BSD-style license
5e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  that can be found in the LICENSE file in the root of the source
6e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  tree. An additional intellectual property rights grant can be found
7e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  in the file PATENTS.  All contributing project authors may
8e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  be found in the AUTHORS file in the root of the source tree.
9e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */
10e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
11e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_
12e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_
13e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
14c55a96383497a772a307b346368133960b02ad03Eric Laurent#include <stddef.h> // size_t
15c55a96383497a772a307b346368133960b02ad03Eric Laurent
16e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "typedefs.h"
17e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "module.h"
18e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
19e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentnamespace webrtc {
20e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
21e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass AudioFrame;
22e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass EchoCancellation;
23e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass EchoControlMobile;
24e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass GainControl;
25e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass HighPassFilter;
26e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass LevelEstimator;
27e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass NoiseSuppression;
28e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass VoiceDetection;
29e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
30e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// The Audio Processing Module (APM) provides a collection of voice processing
31e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// components designed for real-time communications software.
32e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
33e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// APM operates on two audio streams on a frame-by-frame basis. Frames of the
34e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// primary stream, on which all processing is applied, are passed to
35e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// |ProcessStream()|. Frames of the reverse direction stream, which are used for
36e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// analysis by some components, are passed to |AnalyzeReverseStream()|. On the
37e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// client-side, this will typically be the near-end (capture) and far-end
38e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// (render) streams, respectively. APM should be placed in the signal chain as
39e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// close to the audio hardware abstraction layer (HAL) as possible.
40e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
41e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// On the server-side, the reverse stream will normally not be used, with
42e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// processing occurring on each incoming stream.
43e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
44e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// Component interfaces follow a similar pattern and are accessed through
45e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// corresponding getters in APM. All components are disabled at create-time,
46e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// with default settings that are recommended for most situations. New settings
47e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// can be applied without enabling a component. Enabling a component triggers
48e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// memory allocation and initialization to allow it to start processing the
49e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// streams.
50e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
51e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// Thread safety is provided with the following assumptions to reduce locking
52e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// overhead:
53e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//   1. The stream getters and setters are called from the same thread as
54e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//      ProcessStream(). More precisely, stream functions are never called
55e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//      concurrently with ProcessStream().
56e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//   2. Parameter getters are never called concurrently with the corresponding
57e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//      setter.
58e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
59e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// APM accepts only 16-bit linear PCM audio data in frames of 10 ms. Multiple
60e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// channels should be interleaved.
61e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
62e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// Usage example, omitting error checking:
63e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// AudioProcessing* apm = AudioProcessing::Create(0);
64e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->set_sample_rate_hz(32000); // Super-wideband processing.
65e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
66e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// // Mono capture and stereo render.
67e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->set_num_channels(1, 1);
68e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->set_num_reverse_channels(2);
69e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
70e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->high_pass_filter()->Enable(true);
71e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
72e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->echo_cancellation()->enable_drift_compensation(false);
73e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->echo_cancellation()->Enable(true);
74e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
75e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->noise_reduction()->set_level(kHighSuppression);
76e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->noise_reduction()->Enable(true);
77e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
78e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->gain_control()->set_analog_level_limits(0, 255);
79e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->gain_control()->set_mode(kAdaptiveAnalog);
80e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->gain_control()->Enable(true);
81e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
82e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->voice_detection()->Enable(true);
83e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
84e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// // Start a voice call...
85e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
86e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// // ... Render frame arrives bound for the audio HAL ...
87e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->AnalyzeReverseStream(render_frame);
88e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
89e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// // ... Capture frame arrives from the audio HAL ...
90e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// // Call required set_stream_ functions.
91e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->set_stream_delay_ms(delay_ms);
92e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->gain_control()->set_stream_analog_level(analog_level);
93e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
94e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->ProcessStream(capture_frame);
95e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
96e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// // Call required stream_ functions.
97e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// analog_level = apm->gain_control()->stream_analog_level();
98e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// has_voice = apm->stream_has_voice();
99e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
100e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// // Repeate render and capture processing for the duration of the call...
101e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// // Start a new call...
102e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm->Initialize();
103e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
104e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// // Close the application...
105e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// AudioProcessing::Destroy(apm);
106e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// apm = NULL;
107e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
108e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass AudioProcessing : public Module {
109e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent public:
110e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Creates a APM instance, with identifier |id|. Use one instance for every
111e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // primary audio stream requiring processing. On the client-side, this would
112e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // typically be one instance for the near-end stream, and additional instances
113e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // for each far-end stream which requires processing. On the server-side,
114e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // this would typically be one instance for every incoming stream.
115e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  static AudioProcessing* Create(int id);
116c55a96383497a772a307b346368133960b02ad03Eric Laurent  virtual ~AudioProcessing() {};
117e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
118c55a96383497a772a307b346368133960b02ad03Eric Laurent  // TODO(andrew): remove this method. We now allow users to delete instances
119c55a96383497a772a307b346368133960b02ad03Eric Laurent  // directly, useful for scoped_ptr.
120e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Destroys a |apm| instance.
121e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  static void Destroy(AudioProcessing* apm);
122e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
123e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Initializes internal states, while retaining all user settings. This
124e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // should be called before beginning to process a new audio stream. However,
125e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // it is not necessary to call before processing the first stream after
126e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // creation.
127e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int Initialize() = 0;
128e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
129e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the sample |rate| in Hz for both the primary and reverse audio
130e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // streams. 8000, 16000 or 32000 Hz are permitted.
131e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_sample_rate_hz(int rate) = 0;
132e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int sample_rate_hz() const = 0;
133e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
134e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the number of channels for the primary audio stream. Input frames must
135e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // contain a number of channels given by |input_channels|, while output frames
136e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // will be returned with number of channels given by |output_channels|.
137e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_num_channels(int input_channels, int output_channels) = 0;
138e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int num_input_channels() const = 0;
139e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int num_output_channels() const = 0;
140e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
141e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the number of channels for the reverse audio stream. Input frames must
142e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // contain a number of channels given by |channels|.
143e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_num_reverse_channels(int channels) = 0;
144e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int num_reverse_channels() const = 0;
145e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
146e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Processes a 10 ms |frame| of the primary audio stream. On the client-side,
147e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // this is the near-end (or captured) audio.
148e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
149e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // If needed for enabled functionality, any function with the set_stream_ tag
150e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // must be called prior to processing the current frame. Any getter function
151e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // with the stream_ tag which is needed should be called after processing.
152e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
153e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples|
154e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // members of |frame| must be valid, and correspond to settings supplied
155e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // to APM.
156e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int ProcessStream(AudioFrame* frame) = 0;
157e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
158e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
159e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // will not be modified. On the client-side, this is the far-end (or to be
160e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // rendered) audio.
161e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
162e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // It is only necessary to provide this if echo processing is enabled, as the
163e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // reverse stream forms the echo reference signal. It is recommended, but not
164e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // necessary, to provide if gain control is enabled. On the server-side this
165e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // typically will not be used. If you're not sure what to pass in here,
166e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // chances are you don't need to use it.
167e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
168e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples|
169e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // members of |frame| must be valid.
170e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
171e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // TODO(ajm): add const to input; requires an implementation fix.
172e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
173e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
174e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // This must be called if and only if echo processing is enabled.
175e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
176e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
177e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // frame and ProcessStream() receiving a near-end frame containing the
178e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // corresponding echo. On the client-side this can be expressed as
179e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //   delay = (t_render - t_analyze) + (t_process - t_capture)
180e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // where,
181e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //   - t_analyze is the time a frame is passed to AnalyzeReverseStream() and
182e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //     t_render is the time the first sample of the same frame is rendered by
183e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //     the audio hardware.
184e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //   - t_capture is the time the first sample of a frame is captured by the
185e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //     audio hardware and t_pull is the time the same frame is passed to
186e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //     ProcessStream().
187e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_stream_delay_ms(int delay) = 0;
188e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int stream_delay_ms() const = 0;
189e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
190e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Starts recording debugging information to a file specified by |filename|,
191e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // a NULL-terminated string. If there is an ongoing recording, the old file
192e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // will be closed, and recording will continue in the newly specified file.
193e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // An already existing file will be overwritten without warning.
194c55a96383497a772a307b346368133960b02ad03Eric Laurent  static const size_t kMaxFilenameSize = 1024;
195e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int StartDebugRecording(const char filename[kMaxFilenameSize]) = 0;
196e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
197e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Stops recording debugging information, and closes the file. Recording
198e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // cannot be resumed in the same file (without overwriting it).
199e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int StopDebugRecording() = 0;
200e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
201e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // These provide access to the component interfaces and should never return
202e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // NULL. The pointers will be valid for the lifetime of the APM instance.
203e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // The memory for these objects is entirely managed internally.
204e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual EchoCancellation* echo_cancellation() const = 0;
205e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual EchoControlMobile* echo_control_mobile() const = 0;
206e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual GainControl* gain_control() const = 0;
207e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual HighPassFilter* high_pass_filter() const = 0;
208e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual LevelEstimator* level_estimator() const = 0;
209e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual NoiseSuppression* noise_suppression() const = 0;
210e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual VoiceDetection* voice_detection() const = 0;
211e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
212e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  struct Statistic {
213e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    int instant;  // Instantaneous value.
214e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    int average;  // Long-term average.
215e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    int maximum;  // Long-term maximum.
216e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    int minimum;  // Long-term minimum.
217e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  };
218e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
219e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Fatal errors.
220e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  enum Errors {
221e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kNoError = 0,
222e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kUnspecifiedError = -1,
223e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kCreationFailedError = -2,
224e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kUnsupportedComponentError = -3,
225e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kUnsupportedFunctionError = -4,
226e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kNullPointerError = -5,
227e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kBadParameterError = -6,
228e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kBadSampleRateError = -7,
229e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kBadDataLengthError = -8,
230e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kBadNumberChannelsError = -9,
231e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kFileError = -10,
232e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kStreamParameterNotSetError = -11,
233e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kNotEnabledError = -12
234e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  };
235e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
236e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Warnings are non-fatal.
237e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  enum Warnings {
238e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // This results when a set_stream_ parameter is out of range. Processing
239e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // will continue, but the parameter may have been truncated.
240e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kBadStreamParameterWarning = -13,
241e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  };
242e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
243e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Inherited from Module.
244e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual WebRtc_Word32 TimeUntilNextProcess() { return -1; };
245e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual WebRtc_Word32 Process() { return -1; };
246e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent};
247e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
248e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// The acoustic echo cancellation (AEC) component provides better performance
249e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// than AECM but also requires more processing power and is dependent on delay
250e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// stability and reporting accuracy. As such it is well-suited and recommended
251e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// for PC and IP phone applications.
252e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
253e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// Not recommended to be enabled on the server-side.
254e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass EchoCancellation {
255e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent public:
256e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // EchoCancellation and EchoControlMobile may not be enabled simultaneously.
257e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Enabling one will disable the other.
258e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int Enable(bool enable) = 0;
259e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_enabled() const = 0;
260e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
261e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Differences in clock speed on the primary and reverse streams can impact
262e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // the AEC performance. On the client-side, this could be seen when different
263e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // render and capture devices are used, particularly with webcams.
264e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
265e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // This enables a compensation mechanism, and requires that
266e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // |set_device_sample_rate_hz()| and |set_stream_drift_samples()| be called.
267e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int enable_drift_compensation(bool enable) = 0;
268e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_drift_compensation_enabled() const = 0;
269e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
270e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Provides the sampling rate of the audio devices. It is assumed the render
271e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // and capture devices use the same nominal sample rate. Required if and only
272e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // if drift compensation is enabled.
273e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_device_sample_rate_hz(int rate) = 0;
274e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int device_sample_rate_hz() const = 0;
275e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
276e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the difference between the number of samples rendered and captured by
277e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // the audio devices since the last call to |ProcessStream()|. Must be called
278e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // if and only if drift compensation is enabled, prior to |ProcessStream()|.
279e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_stream_drift_samples(int drift) = 0;
280e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int stream_drift_samples() const = 0;
281e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
282e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  enum SuppressionLevel {
283e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kLowSuppression,
284e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kModerateSuppression,
285e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kHighSuppression
286e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  };
287e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
288e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the aggressiveness of the suppressor. A higher level trades off
289e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // double-talk performance for increased echo suppression.
290e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_suppression_level(SuppressionLevel level) = 0;
291e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual SuppressionLevel suppression_level() const = 0;
292e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
293e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Returns false if the current frame almost certainly contains no echo
294e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // and true if it _might_ contain echo.
295e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool stream_has_echo() const = 0;
296e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
297e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Enables the computation of various echo metrics. These are obtained
298e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // through |GetMetrics()|.
299e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int enable_metrics(bool enable) = 0;
300e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool are_metrics_enabled() const = 0;
301e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
302e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Each statistic is reported in dB.
303e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // P_far:  Far-end (render) signal power.
304e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // P_echo: Near-end (capture) echo signal power.
305e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // P_out:  Signal power at the output of the AEC.
306e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // P_a:    Internal signal power at the point before the AEC's non-linear
307e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //         processor.
308e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  struct Metrics {
309e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // RERL = ERL + ERLE
310e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    AudioProcessing::Statistic residual_echo_return_loss;
311e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
312e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // ERL = 10log_10(P_far / P_echo)
313e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    AudioProcessing::Statistic echo_return_loss;
314e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
315e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // ERLE = 10log_10(P_echo / P_out)
316e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    AudioProcessing::Statistic echo_return_loss_enhancement;
317e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
318e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a)
319e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    AudioProcessing::Statistic a_nlp;
320e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  };
321e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
322e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // TODO(ajm): discuss the metrics update period.
323e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int GetMetrics(Metrics* metrics) = 0;
324e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
325c55a96383497a772a307b346368133960b02ad03Eric Laurent  // Enables computation and logging of delay values. Statistics are obtained
326c55a96383497a772a307b346368133960b02ad03Eric Laurent  // through |GetDelayMetrics()|.
327c55a96383497a772a307b346368133960b02ad03Eric Laurent  virtual int enable_delay_logging(bool enable) = 0;
328c55a96383497a772a307b346368133960b02ad03Eric Laurent  virtual bool is_delay_logging_enabled() const = 0;
329c55a96383497a772a307b346368133960b02ad03Eric Laurent
330c55a96383497a772a307b346368133960b02ad03Eric Laurent  // The delay metrics consists of the delay |median| and the delay standard
331c55a96383497a772a307b346368133960b02ad03Eric Laurent  // deviation |std|. The values are averaged over the time period since the
332c55a96383497a772a307b346368133960b02ad03Eric Laurent  // last call to |GetDelayMetrics()|.
333c55a96383497a772a307b346368133960b02ad03Eric Laurent  virtual int GetDelayMetrics(int* median, int* std) = 0;
334c55a96383497a772a307b346368133960b02ad03Eric Laurent
335e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent protected:
336e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual ~EchoCancellation() {};
337e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent};
338e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
339e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// The acoustic echo control for mobile (AECM) component is a low complexity
340e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// robust option intended for use on mobile devices.
341e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
342e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// Not recommended to be enabled on the server-side.
343e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass EchoControlMobile {
344e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent public:
345e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // EchoCancellation and EchoControlMobile may not be enabled simultaneously.
346e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Enabling one will disable the other.
347e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int Enable(bool enable) = 0;
348e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_enabled() const = 0;
349e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
350e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Recommended settings for particular audio routes. In general, the louder
351e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // the echo is expected to be, the higher this value should be set. The
352e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // preferred setting may vary from device to device.
353e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  enum RoutingMode {
354e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kQuietEarpieceOrHeadset,
355e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kEarpiece,
356e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kLoudEarpiece,
357e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kSpeakerphone,
358e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kLoudSpeakerphone
359e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  };
360e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
361e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets echo control appropriate for the audio routing |mode| on the device.
362e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // It can and should be updated during a call if the audio routing changes.
363e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_routing_mode(RoutingMode mode) = 0;
364e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual RoutingMode routing_mode() const = 0;
365e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
366e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Comfort noise replaces suppressed background noise to maintain a
367e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // consistent signal level.
368e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int enable_comfort_noise(bool enable) = 0;
369e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_comfort_noise_enabled() const = 0;
370e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
371c55a96383497a772a307b346368133960b02ad03Eric Laurent  // A typical use case is to initialize the component with an echo path from a
372c55a96383497a772a307b346368133960b02ad03Eric Laurent  // previous call. The echo path is retrieved using |GetEchoPath()|, typically
373c55a96383497a772a307b346368133960b02ad03Eric Laurent  // at the end of a call. The data can then be stored for later use as an
374c55a96383497a772a307b346368133960b02ad03Eric Laurent  // initializer before the next call, using |SetEchoPath()|.
375c55a96383497a772a307b346368133960b02ad03Eric Laurent  //
376c55a96383497a772a307b346368133960b02ad03Eric Laurent  // Controlling the echo path this way requires the data |size_bytes| to match
377c55a96383497a772a307b346368133960b02ad03Eric Laurent  // the internal echo path size. This size can be acquired using
378c55a96383497a772a307b346368133960b02ad03Eric Laurent  // |echo_path_size_bytes()|. |SetEchoPath()| causes an entire reset, worth
379c55a96383497a772a307b346368133960b02ad03Eric Laurent  // noting if it is to be called during an ongoing call.
380c55a96383497a772a307b346368133960b02ad03Eric Laurent  //
381c55a96383497a772a307b346368133960b02ad03Eric Laurent  // It is possible that version incompatibilities may result in a stored echo
382c55a96383497a772a307b346368133960b02ad03Eric Laurent  // path of the incorrect size. In this case, the stored path should be
383c55a96383497a772a307b346368133960b02ad03Eric Laurent  // discarded.
384c55a96383497a772a307b346368133960b02ad03Eric Laurent  virtual int SetEchoPath(const void* echo_path, size_t size_bytes) = 0;
385c55a96383497a772a307b346368133960b02ad03Eric Laurent  virtual int GetEchoPath(void* echo_path, size_t size_bytes) const = 0;
386c55a96383497a772a307b346368133960b02ad03Eric Laurent
387c55a96383497a772a307b346368133960b02ad03Eric Laurent  // The returned path size is guaranteed not to change for the lifetime of
388c55a96383497a772a307b346368133960b02ad03Eric Laurent  // the application.
389c55a96383497a772a307b346368133960b02ad03Eric Laurent  static size_t echo_path_size_bytes();
390c55a96383497a772a307b346368133960b02ad03Eric Laurent
391e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent protected:
392e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual ~EchoControlMobile() {};
393e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent};
394e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
395e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// The automatic gain control (AGC) component brings the signal to an
396e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// appropriate range. This is done by applying a digital gain directly and, in
397e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// the analog mode, prescribing an analog gain to be applied at the audio HAL.
398e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
399e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// Recommended to be enabled on the client-side.
400e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass GainControl {
401e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent public:
402e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int Enable(bool enable) = 0;
403e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_enabled() const = 0;
404e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
405e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // When an analog mode is set, this must be called prior to |ProcessStream()|
406e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // to pass the current analog level from the audio HAL. Must be within the
407e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // range provided to |set_analog_level_limits()|.
408e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_stream_analog_level(int level) = 0;
409e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
410e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // When an analog mode is set, this should be called after |ProcessStream()|
411e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // to obtain the recommended new analog level for the audio HAL. It is the
412e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // users responsibility to apply this level.
413e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int stream_analog_level() = 0;
414e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
415e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  enum Mode {
416e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // Adaptive mode intended for use if an analog volume control is available
417e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // on the capture device. It will require the user to provide coupling
418e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // between the OS mixer controls and AGC through the |stream_analog_level()|
419e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // functions.
420e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    //
421e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // It consists of an analog gain prescription for the audio device and a
422e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // digital compression stage.
423e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kAdaptiveAnalog,
424e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
425e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // Adaptive mode intended for situations in which an analog volume control
426e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // is unavailable. It operates in a similar fashion to the adaptive analog
427e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // mode, but with scaling instead applied in the digital domain. As with
428e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // the analog mode, it additionally uses a digital compression stage.
429e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kAdaptiveDigital,
430e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
431e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // Fixed mode which enables only the digital compression stage also used by
432e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // the two adaptive modes.
433e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    //
434e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // It is distinguished from the adaptive modes by considering only a
435e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // short time-window of the input signal. It applies a fixed gain through
436e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // most of the input level range, and compresses (gradually reduces gain
437e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // with increasing level) the input signal at higher levels. This mode is
438e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // preferred on embedded devices where the capture signal level is
439e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    // predictable, so that a known gain can be applied.
440e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kFixedDigital
441e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  };
442e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
443e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_mode(Mode mode) = 0;
444e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual Mode mode() const = 0;
445e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
446e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the target peak |level| (or envelope) of the AGC in dBFs (decibels
447e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // from digital full-scale). The convention is to use positive values. For
448e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // instance, passing in a value of 3 corresponds to -3 dBFs, or a target
449e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // level 3 dB below full-scale. Limited to [0, 31].
450e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
451e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // TODO(ajm): use a negative value here instead, if/when VoE will similarly
452e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //            update its interface.
453e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_target_level_dbfs(int level) = 0;
454e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int target_level_dbfs() const = 0;
455e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
456e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the maximum |gain| the digital compression stage may apply, in dB. A
457e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // higher number corresponds to greater compression, while a value of 0 will
458e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // leave the signal uncompressed. Limited to [0, 90].
459e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_compression_gain_db(int gain) = 0;
460e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int compression_gain_db() const = 0;
461e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
462e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // When enabled, the compression stage will hard limit the signal to the
463e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // target level. Otherwise, the signal will be compressed but not limited
464e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // above the target level.
465e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int enable_limiter(bool enable) = 0;
466e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_limiter_enabled() const = 0;
467e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
468e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the |minimum| and |maximum| analog levels of the audio capture device.
469e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Must be set if and only if an analog mode is used. Limited to [0, 65535].
470e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_analog_level_limits(int minimum,
471e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent                                      int maximum) = 0;
472e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int analog_level_minimum() const = 0;
473e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int analog_level_maximum() const = 0;
474e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
475e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Returns true if the AGC has detected a saturation event (period where the
476e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // signal reaches digital full-scale) in the current frame and the analog
477e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // level cannot be reduced.
478e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
479e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // This could be used as an indicator to reduce or disable analog mic gain at
480e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // the audio HAL.
481e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool stream_is_saturated() const = 0;
482e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
483e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent protected:
484e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual ~GainControl() {};
485e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent};
486e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
487e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// A filtering component which removes DC offset and low-frequency noise.
488e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// Recommended to be enabled on the client-side.
489e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass HighPassFilter {
490e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent public:
491e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int Enable(bool enable) = 0;
492e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_enabled() const = 0;
493e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
494e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent protected:
495e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual ~HighPassFilter() {};
496e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent};
497e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
498e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// An estimation component used to retrieve level metrics.
499e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass LevelEstimator {
500e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent public:
501e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int Enable(bool enable) = 0;
502e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_enabled() const = 0;
503e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
504c55a96383497a772a307b346368133960b02ad03Eric Laurent  // Returns the root mean square (RMS) level in dBFs (decibels from digital
505c55a96383497a772a307b346368133960b02ad03Eric Laurent  // full-scale), or alternately dBov. It is computed over all primary stream
506c55a96383497a772a307b346368133960b02ad03Eric Laurent  // frames since the last call to RMS(). The returned value is positive but
507c55a96383497a772a307b346368133960b02ad03Eric Laurent  // should be interpreted as negative. It is constrained to [0, 127].
508c55a96383497a772a307b346368133960b02ad03Eric Laurent  //
509c55a96383497a772a307b346368133960b02ad03Eric Laurent  // The computation follows:
510c55a96383497a772a307b346368133960b02ad03Eric Laurent  // http://tools.ietf.org/html/draft-ietf-avtext-client-to-mixer-audio-level-05
511c55a96383497a772a307b346368133960b02ad03Eric Laurent  // with the intent that it can provide the RTP audio level indication.
512c55a96383497a772a307b346368133960b02ad03Eric Laurent  //
513c55a96383497a772a307b346368133960b02ad03Eric Laurent  // Frames passed to ProcessStream() with an |_energy| of zero are considered
514c55a96383497a772a307b346368133960b02ad03Eric Laurent  // to have been muted. The RMS of the frame will be interpreted as -127.
515c55a96383497a772a307b346368133960b02ad03Eric Laurent  virtual int RMS() = 0;
516e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
517e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent protected:
518e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual ~LevelEstimator() {};
519e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent};
520e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
521e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// The noise suppression (NS) component attempts to remove noise while
522e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// retaining speech. Recommended to be enabled on the client-side.
523e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent//
524e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// Recommended to be enabled on the client-side.
525e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass NoiseSuppression {
526e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent public:
527e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int Enable(bool enable) = 0;
528e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_enabled() const = 0;
529e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
530e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Determines the aggressiveness of the suppression. Increasing the level
531e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // will reduce the noise level at the expense of a higher speech distortion.
532e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  enum Level {
533e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kLow,
534e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kModerate,
535e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kHigh,
536e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kVeryHigh
537e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  };
538e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
539e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_level(Level level) = 0;
540e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual Level level() const = 0;
541e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
542e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent protected:
543e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual ~NoiseSuppression() {};
544e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent};
545e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
546e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// The voice activity detection (VAD) component analyzes the stream to
547e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// determine if voice is present. A facility is also provided to pass in an
548e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent// external VAD decision.
549c55a96383497a772a307b346368133960b02ad03Eric Laurent//
550c55a96383497a772a307b346368133960b02ad03Eric Laurent// In addition to |stream_has_voice()| the VAD decision is provided through the
551c55a96383497a772a307b346368133960b02ad03Eric Laurent// |AudioFrame| passed to |ProcessStream()|. The |_vadActivity| member will be
552c55a96383497a772a307b346368133960b02ad03Eric Laurent// modified to reflect the current decision.
553e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentclass VoiceDetection {
554e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent public:
555e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int Enable(bool enable) = 0;
556e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool is_enabled() const = 0;
557e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
558e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Returns true if voice is detected in the current frame. Should be called
559e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // after |ProcessStream()|.
560e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual bool stream_has_voice() const = 0;
561e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
562e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Some of the APM functionality requires a VAD decision. In the case that
563e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // a decision is externally available for the current frame, it can be passed
564e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // in here, before |ProcessStream()| is called.
565e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
566e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // VoiceDetection does _not_ need to be enabled to use this. If it happens to
567e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // be enabled, detection will be skipped for any frame in which an external
568e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // VAD decision is provided.
569e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_stream_has_voice(bool has_voice) = 0;
570e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
571e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Specifies the likelihood that a frame will be declared to contain voice.
572e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // A higher value makes it more likely that speech will not be clipped, at
573e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // the expense of more noise being detected as voice.
574e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  enum Likelihood {
575e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kVeryLowLikelihood,
576e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kLowLikelihood,
577e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kModerateLikelihood,
578e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    kHighLikelihood
579e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  };
580e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
581e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_likelihood(Likelihood likelihood) = 0;
582e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual Likelihood likelihood() const = 0;
583e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
584e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // Sets the |size| of the frames in ms on which the VAD will operate. Larger
585e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // frames will improve detection accuracy, but reduce the frequency of
586e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // updates.
587e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //
588e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // This does not impact the size of frames passed to |ProcessStream()|.
589e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int set_frame_size_ms(int size) = 0;
590e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual int frame_size_ms() const = 0;
591e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
592e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent protected:
593e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  virtual ~VoiceDetection() {};
594e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent};
595e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}  // namespace webrtc
596e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
597e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_
598