1a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org/*
2a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org *
4a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org *  Use of this source code is governed by a BSD-style license
5a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org *  that can be found in the LICENSE file in the root of the source
6a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org *  tree. An additional intellectual property rights grant can be found
7a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org *  in the file PATENTS.  All contributing project authors may
8a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
9a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org */
10a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
11b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
12b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#define WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
13a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
14b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#include <stddef.h>  // size_t
15c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org#include <stdio.h>  // FILE
16a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
1726a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org#include "webrtc/base/platform_file.h"
18d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org#include "webrtc/common.h"
19b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#include "webrtc/typedefs.h"
20a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
21333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.orgstruct AecCore;
22333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org
23a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgnamespace webrtc {
24a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
25a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass AudioFrame;
26a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass EchoCancellation;
27a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass EchoControlMobile;
28a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass GainControl;
29a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass HighPassFilter;
30a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass LevelEstimator;
31a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass NoiseSuppression;
32a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass VoiceDetection;
33a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
34870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// Use to enable the delay correction feature. This now engages an extended
35870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// filter mode in the AEC, along with robustness measures around the reported
36870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// system delays. It comes with a significant increase in AEC complexity, but is
37870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// much more robust to unreliable reported delays.
38870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org//
39870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// Detailed changes to the algorithm:
40870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// - The filter length is changed from 48 to 128 ms. This comes with tuning of
41870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org//   several parameters: i) filter adaptation stepsize and error threshold;
42870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org//   ii) non-linear processing smoothing and overdrive.
43870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// - Option to ignore the reported delays on platforms which we deem
44870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org//   sufficiently unreliable. See WEBRTC_UNTRUSTED_DELAY in echo_cancellation.c.
45870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// - Faster startup times by removing the excessive "startup phase" processing
46870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org//   of reported delays.
47870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// - Much more conservative adjustments to the far-end read pointer. We smooth
48870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org//   the delay difference more heavily, and back off from the difference more.
49870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org//   Adjustments force a readaptation of the filter, so they should be avoided
50870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org//   except when really necessary.
51870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.orgstruct DelayCorrection {
52870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org  DelayCorrection() : enabled(false) {}
539d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org  explicit DelayCorrection(bool enabled) : enabled(enabled) {}
549d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org  bool enabled;
559d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org};
56870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org
5784ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// Use to disable the reported system delays. By disabling the reported system
5884ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// delays the echo cancellation algorithm assumes the process and reverse
5984ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// streams to be aligned. This configuration only applies to EchoCancellation
6084ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// and not EchoControlMobile and is set with AudioProcessing::SetExtraOptions().
6184ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// Note that by disabling reported system delays the EchoCancellation may
6284ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// regress in performance.
6384ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.orgstruct ReportedDelay {
6484ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org  ReportedDelay() : enabled(true) {}
6584ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org  explicit ReportedDelay(bool enabled) : enabled(enabled) {}
6684ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org  bool enabled;
6784ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org};
6884ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org
699d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org// Must be provided through AudioProcessing::Create(Confg&). It will have no
709d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org// impact if used with AudioProcessing::SetExtraOptions().
719d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.orgstruct ExperimentalAgc {
729d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org  ExperimentalAgc() : enabled(true) {}
739d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org  explicit ExperimentalAgc(bool enabled) : enabled(enabled) {}
74870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org  bool enabled;
75870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org};
76870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org
7791e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org// Use to enable experimental noise suppression. It can be set in the
7891e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org// constructor or using AudioProcessing::SetExtraOptions().
7991e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.orgstruct ExperimentalNs {
8091e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org  ExperimentalNs() : enabled(false) {}
8191e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org  explicit ExperimentalNs(bool enabled) : enabled(enabled) {}
8291e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org  bool enabled;
8391e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org};
8491e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org
85bf4f2321ef4e31f8b973edb62ba2a81d8ec8bb01andrew@webrtc.orgstatic const int kAudioProcMaxNativeSampleRateHz = 32000;
86bf4f2321ef4e31f8b973edb62ba2a81d8ec8bb01andrew@webrtc.org
87a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The Audio Processing Module (APM) provides a collection of voice processing
88a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// components designed for real-time communications software.
89a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
90a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// APM operates on two audio streams on a frame-by-frame basis. Frames of the
91a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// primary stream, on which all processing is applied, are passed to
92a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// |ProcessStream()|. Frames of the reverse direction stream, which are used for
93a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// analysis by some components, are passed to |AnalyzeReverseStream()|. On the
94a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// client-side, this will typically be the near-end (capture) and far-end
95a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// (render) streams, respectively. APM should be placed in the signal chain as
96a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// close to the audio hardware abstraction layer (HAL) as possible.
97a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
98a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// On the server-side, the reverse stream will normally not be used, with
99a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// processing occurring on each incoming stream.
100a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
101a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Component interfaces follow a similar pattern and are accessed through
102a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// corresponding getters in APM. All components are disabled at create-time,
103a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// with default settings that are recommended for most situations. New settings
104a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// can be applied without enabling a component. Enabling a component triggers
105a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// memory allocation and initialization to allow it to start processing the
106a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// streams.
107a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
108a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Thread safety is provided with the following assumptions to reduce locking
109a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// overhead:
110a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//   1. The stream getters and setters are called from the same thread as
111a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//      ProcessStream(). More precisely, stream functions are never called
112a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//      concurrently with ProcessStream().
113a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//   2. Parameter getters are never called concurrently with the corresponding
114a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//      setter.
115a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
1162e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org// APM accepts only linear PCM audio data in chunks of 10 ms. The int16
1172e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org// interfaces use interleaved data, while the float interfaces use deinterleaved
1182e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org// data.
119a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
120a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Usage example, omitting error checking:
121a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// AudioProcessing* apm = AudioProcessing::Create(0);
122a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
123a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->high_pass_filter()->Enable(true);
124a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
125a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->echo_cancellation()->enable_drift_compensation(false);
126a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->echo_cancellation()->Enable(true);
127a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
128a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->noise_reduction()->set_level(kHighSuppression);
129a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->noise_reduction()->Enable(true);
130a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
131a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->gain_control()->set_analog_level_limits(0, 255);
132a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->gain_control()->set_mode(kAdaptiveAnalog);
133a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->gain_control()->Enable(true);
134a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
135a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->voice_detection()->Enable(true);
136a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
137a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Start a voice call...
138a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
139a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // ... Render frame arrives bound for the audio HAL ...
140a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->AnalyzeReverseStream(render_frame);
141a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
142a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // ... Capture frame arrives from the audio HAL ...
143a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Call required set_stream_ functions.
144a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->set_stream_delay_ms(delay_ms);
145a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->gain_control()->set_stream_analog_level(analog_level);
146a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
147a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->ProcessStream(capture_frame);
148a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
149a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Call required stream_ functions.
150a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// analog_level = apm->gain_control()->stream_analog_level();
151a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// has_voice = apm->stream_has_voice();
152a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
153a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Repeate render and capture processing for the duration of the call...
154a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Start a new call...
155a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->Initialize();
156a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
157a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Close the application...
158bee99b186be691f09915e368e1361274db797f57andrew@webrtc.org// delete apm;
159a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
1606f8b05124cf17bd76ba3d623a1462d701b754cfbandrew@webrtc.orgclass AudioProcessing {
161a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public:
1623c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org  enum ChannelLayout {
1633c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org    kMono,
1643c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org    // Left, right.
1653c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org    kStereo,
1663c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org    // Mono, keyboard mic.
1673c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org    kMonoAndKeyboard,
1683c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org    // Left, right, keyboard mic.
1693c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org    kStereoAndKeyboard
1703c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org  };
1713c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org
172680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org  // Creates an APM instance. Use one instance for every primary audio stream
173680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org  // requiring processing. On the client-side, this would typically be one
174680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org  // instance for the near-end stream, and additional instances for each far-end
175680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org  // stream which requires processing. On the server-side, this would typically
176680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org  // be one instance for every incoming stream.
1776a6e3ebebf71a4827d45a92e229c3d0f94e36c5dandrew@webrtc.org  static AudioProcessing* Create();
178680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org  // Allows passing in an optional configuration at create-time.
1796a6e3ebebf71a4827d45a92e229c3d0f94e36c5dandrew@webrtc.org  static AudioProcessing* Create(const Config& config);
1806a6e3ebebf71a4827d45a92e229c3d0f94e36c5dandrew@webrtc.org  // TODO(ajm): Deprecated; remove all calls to it.
181a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  static AudioProcessing* Create(int id);
182b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org  virtual ~AudioProcessing() {}
183a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
184a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Initializes internal states, while retaining all user settings. This
185a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // should be called before beginning to process a new audio stream. However,
186a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // it is not necessary to call before processing the first stream after
1872e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // creation.
1882e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  //
1892e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // It is also not necessary to call if the audio parameters (sample
190926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org  // rate and number of channels) have changed. Passing updated parameters
191926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org  // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible.
1922e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // If the parameters are known at init-time though, they may be provided.
193a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int Initialize() = 0;
1942e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org
1952e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // The int16 interfaces require:
1962e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  //   - only |NativeRate|s be used
1972e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  //   - that the input, output and reverse rates must match
1982e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  //   - that |output_layout| matches |input_layout|
1992e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  //
2002e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // The float interfaces accept arbitrary rates and support differing input
2012e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // and output layouts, but the output may only remove channels, not add.
2022e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  virtual int Initialize(int input_sample_rate_hz,
2032e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org                         int output_sample_rate_hz,
20439dd100c67a7e2cfbbe4e6025055684eab5be49dandrew@webrtc.org                         int reverse_sample_rate_hz,
2052e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org                         ChannelLayout input_layout,
2062e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org                         ChannelLayout output_layout,
2072e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org                         ChannelLayout reverse_layout) = 0;
208a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
209d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org  // Pass down additional options which don't have explicit setters. This
210d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org  // ensures the options are applied immediately.
211d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org  virtual void SetExtraOptions(const Config& config) = 0;
212d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org
2132e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // DEPRECATED.
2142e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // TODO(ajm): Remove after Chromium has upgraded to using Initialize().
215a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_sample_rate_hz(int rate) = 0;
2162e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // TODO(ajm): Remove after voice engine no longer requires it to resample
2172e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // the reverse stream to the forward rate.
2182e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  virtual int input_sample_rate_hz() const = 0;
2193ab5093256ac8cdcb65efda14f99934ceb164b70andrew@webrtc.org  // TODO(ajm): Remove after Chromium no longer depends on it.
2203ab5093256ac8cdcb65efda14f99934ceb164b70andrew@webrtc.org  virtual int sample_rate_hz() const = 0;
2212e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org
2222e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // TODO(ajm): Only intended for internal use. Make private and friend the
2232e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // necessary classes?
2242e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  virtual int proc_sample_rate_hz() const = 0;
2252e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  virtual int proc_split_sample_rate_hz() const = 0;
226a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int num_input_channels() const = 0;
227a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int num_output_channels() const = 0;
228a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int num_reverse_channels() const = 0;
229a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
230f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org  // Set to true when the output of AudioProcessing will be muted or in some
231f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org  // other way not used. Ideally, the captured audio would still be processed,
232f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org  // but some components may change behavior based on this information.
233f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org  // Default false.
234f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org  virtual void set_output_will_be_muted(bool muted) = 0;
235f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org  virtual bool output_will_be_muted() const = 0;
236f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org
237a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Processes a 10 ms |frame| of the primary audio stream. On the client-side,
238a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // this is the near-end (or captured) audio.
239a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
240a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // If needed for enabled functionality, any function with the set_stream_ tag
241a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // must be called prior to processing the current frame. Any getter function
242a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // with the stream_ tag which is needed should be called after processing.
243a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
244a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
245926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org  // members of |frame| must be valid. If changed from the previous call to this
246926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org  // method, it will trigger an initialization.
247a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int ProcessStream(AudioFrame* frame) = 0;
248a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
2493c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org  // Accepts deinterleaved float audio with the range [-1, 1]. Each element
2502e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // of |src| points to a channel buffer, arranged according to
2513c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org  // |input_layout|. At output, the channels will be arranged according to
2522e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // |output_layout| at |output_sample_rate_hz| in |dest|.
2532e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  //
2542e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // The output layout may only remove channels, not add. |src| and |dest|
2552e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // may use the same memory, if desired.
2562e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  virtual int ProcessStream(const float* const* src,
2573c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org                            int samples_per_channel,
2582e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org                            int input_sample_rate_hz,
2593c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org                            ChannelLayout input_layout,
2602e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org                            int output_sample_rate_hz,
2612e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org                            ChannelLayout output_layout,
2622e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org                            float* const* dest) = 0;
2633c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org
264a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
265a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // will not be modified. On the client-side, this is the far-end (or to be
266a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // rendered) audio.
267a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
268a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // It is only necessary to provide this if echo processing is enabled, as the
269a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // reverse stream forms the echo reference signal. It is recommended, but not
270a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // necessary, to provide if gain control is enabled. On the server-side this
271a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // typically will not be used. If you're not sure what to pass in here,
272a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // chances are you don't need to use it.
273a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
274a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
275926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org  // members of |frame| must be valid. |sample_rate_hz_| must correspond to
2762e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // |input_sample_rate_hz()|
277a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
278a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // TODO(ajm): add const to input; requires an implementation fix.
279a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
280a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
2813c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org  // Accepts deinterleaved float audio with the range [-1, 1]. Each element
2823c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org  // of |data| points to a channel buffer, arranged according to |layout|.
2833c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org  virtual int AnalyzeReverseStream(const float* const* data,
2843c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org                                   int samples_per_channel,
2853c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org                                   int sample_rate_hz,
2863c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org                                   ChannelLayout layout) = 0;
2873c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org
288a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // This must be called if and only if echo processing is enabled.
289a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
290a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
291a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // frame and ProcessStream() receiving a near-end frame containing the
292a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // corresponding echo. On the client-side this can be expressed as
293a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //   delay = (t_render - t_analyze) + (t_process - t_capture)
294a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // where,
295a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //   - t_analyze is the time a frame is passed to AnalyzeReverseStream() and
296a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //     t_render is the time the first sample of the same frame is rendered by
297a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //     the audio hardware.
298a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //   - t_capture is the time the first sample of a frame is captured by the
299a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //     audio hardware and t_pull is the time the same frame is passed to
300a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //     ProcessStream().
301a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_stream_delay_ms(int delay) = 0;
302a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int stream_delay_ms() const = 0;
303873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org  virtual bool was_stream_delay_set() const = 0;
304a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
305247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org  // Call to signal that a key press occurred (true) or did not occur (false)
306247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org  // with this chunk of audio.
307247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org  virtual void set_stream_key_pressed(bool key_pressed) = 0;
308247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org  virtual bool stream_key_pressed() const = 0;
309247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org
310a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Sets a delay |offset| in ms to add to the values passed in through
311a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // set_stream_delay_ms(). May be positive or negative.
312a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
313a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Note that this could cause an otherwise valid value passed to
314a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // set_stream_delay_ms() to return an error.
315a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual void set_delay_offset_ms(int offset) = 0;
316a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int delay_offset_ms() const = 0;
317a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
318a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Starts recording debugging information to a file specified by |filename|,
319a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // a NULL-terminated string. If there is an ongoing recording, the old file
320a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // will be closed, and recording will continue in the newly specified file.
321a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // An already existing file will be overwritten without warning.
322a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  static const size_t kMaxFilenameSize = 1024;
323a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int StartDebugRecording(const char filename[kMaxFilenameSize]) = 0;
324a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
325c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org  // Same as above but uses an existing file handle. Takes ownership
326c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org  // of |handle| and closes it at StopDebugRecording().
327c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org  virtual int StartDebugRecording(FILE* handle) = 0;
328c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org
32926a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org  // Same as above but uses an existing PlatformFile handle. Takes ownership
33026a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org  // of |handle| and closes it at StopDebugRecording().
33126a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org  // TODO(xians): Make this interface pure virtual.
33226a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org  virtual int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) {
33326a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org      return -1;
33426a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org  }
33526a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org
336a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Stops recording debugging information, and closes the file. Recording
337a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // cannot be resumed in the same file (without overwriting it).
338a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int StopDebugRecording() = 0;
339a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
340a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // These provide access to the component interfaces and should never return
341a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // NULL. The pointers will be valid for the lifetime of the APM instance.
342a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // The memory for these objects is entirely managed internally.
343a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual EchoCancellation* echo_cancellation() const = 0;
344a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual EchoControlMobile* echo_control_mobile() const = 0;
345a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual GainControl* gain_control() const = 0;
346a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual HighPassFilter* high_pass_filter() const = 0;
347a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual LevelEstimator* level_estimator() const = 0;
348a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual NoiseSuppression* noise_suppression() const = 0;
349a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual VoiceDetection* voice_detection() const = 0;
350a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
351a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  struct Statistic {
352a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    int instant;  // Instantaneous value.
353a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    int average;  // Long-term average.
354a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    int maximum;  // Long-term maximum.
355a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    int minimum;  // Long-term minimum.
356a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  };
357a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
358a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  enum Error {
359a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // Fatal errors.
360a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kNoError = 0,
361a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kUnspecifiedError = -1,
362a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kCreationFailedError = -2,
363a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kUnsupportedComponentError = -3,
364a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kUnsupportedFunctionError = -4,
365a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kNullPointerError = -5,
366a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kBadParameterError = -6,
367a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kBadSampleRateError = -7,
368a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kBadDataLengthError = -8,
369a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kBadNumberChannelsError = -9,
370a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kFileError = -10,
371a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kStreamParameterNotSetError = -11,
372a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kNotEnabledError = -12,
373a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
374a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // Warnings are non-fatal.
375a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // This results when a set_stream_ parameter is out of range. Processing
376a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // will continue, but the parameter may have been truncated.
377a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kBadStreamParameterWarning = -13
378a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  };
379873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org
3802e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  enum NativeRate {
381873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org    kSampleRate8kHz = 8000,
382873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org    kSampleRate16kHz = 16000,
383873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org    kSampleRate32kHz = 32000
384873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org  };
3852e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org
3862e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  static const int kChunkSizeMs = 10;
387a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org};
388a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
389a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The acoustic echo cancellation (AEC) component provides better performance
390a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// than AECM but also requires more processing power and is dependent on delay
391a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// stability and reporting accuracy. As such it is well-suited and recommended
392a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// for PC and IP phone applications.
393a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
394a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Not recommended to be enabled on the server-side.
395a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass EchoCancellation {
396a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public:
397a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // EchoCancellation and EchoControlMobile may not be enabled simultaneously.
398a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Enabling one will disable the other.
399a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int Enable(bool enable) = 0;
400a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_enabled() const = 0;
401a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
402a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Differences in clock speed on the primary and reverse streams can impact
403a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // the AEC performance. On the client-side, this could be seen when different
404a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // render and capture devices are used, particularly with webcams.
405a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
406a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // This enables a compensation mechanism, and requires that
4072e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org  // set_stream_drift_samples() be called.
408a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int enable_drift_compensation(bool enable) = 0;
409a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_drift_compensation_enabled() const = 0;
410a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
411a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Sets the difference between the number of samples rendered and captured by
412a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // the audio devices since the last call to |ProcessStream()|. Must be called
413ad179ceb706f406b03e9871d1078301368a7fbcfandrew@webrtc.org  // if drift compensation is enabled, prior to |ProcessStream()|.
414ad179ceb706f406b03e9871d1078301368a7fbcfandrew@webrtc.org  virtual void set_stream_drift_samples(int drift) = 0;
415a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int stream_drift_samples() const = 0;
416a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
417a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  enum SuppressionLevel {
418a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kLowSuppression,
419a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kModerateSuppression,
420a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kHighSuppression
421a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  };
422a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
423a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Sets the aggressiveness of the suppressor. A higher level trades off
424a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // double-talk performance for increased echo suppression.
425a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_suppression_level(SuppressionLevel level) = 0;
426a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual SuppressionLevel suppression_level() const = 0;
427a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
428a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Returns false if the current frame almost certainly contains no echo
429a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // and true if it _might_ contain echo.
430a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool stream_has_echo() const = 0;
431a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
432a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Enables the computation of various echo metrics. These are obtained
433a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // through |GetMetrics()|.
434a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int enable_metrics(bool enable) = 0;
435a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool are_metrics_enabled() const = 0;
436a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
437a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Each statistic is reported in dB.
438a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // P_far:  Far-end (render) signal power.
439a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // P_echo: Near-end (capture) echo signal power.
440a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // P_out:  Signal power at the output of the AEC.
441a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // P_a:    Internal signal power at the point before the AEC's non-linear
442a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //         processor.
443a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  struct Metrics {
444a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // RERL = ERL + ERLE
445a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    AudioProcessing::Statistic residual_echo_return_loss;
446a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
447a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // ERL = 10log_10(P_far / P_echo)
448a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    AudioProcessing::Statistic echo_return_loss;
449a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
450a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // ERLE = 10log_10(P_echo / P_out)
451a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    AudioProcessing::Statistic echo_return_loss_enhancement;
452a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
453a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a)
454a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    AudioProcessing::Statistic a_nlp;
455a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  };
456a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
457a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // TODO(ajm): discuss the metrics update period.
458a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int GetMetrics(Metrics* metrics) = 0;
459a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
460a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Enables computation and logging of delay values. Statistics are obtained
461a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // through |GetDelayMetrics()|.
462a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int enable_delay_logging(bool enable) = 0;
463a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_delay_logging_enabled() const = 0;
464a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
465a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // The delay metrics consists of the delay |median| and the delay standard
466a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // deviation |std|. The values are averaged over the time period since the
467a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // last call to |GetDelayMetrics()|.
468a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int GetDelayMetrics(int* median, int* std) = 0;
469a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
470333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org  // Returns a pointer to the low level AEC component.  In case of multiple
471333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org  // channels, the pointer to the first one is returned.  A NULL pointer is
472333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org  // returned when the AEC component is disabled or has not been initialized
473333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org  // successfully.
474333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org  virtual struct AecCore* aec_core() const = 0;
475333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org
476a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected:
477b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org  virtual ~EchoCancellation() {}
478a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org};
479a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
480a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The acoustic echo control for mobile (AECM) component is a low complexity
481a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// robust option intended for use on mobile devices.
482a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
483a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Not recommended to be enabled on the server-side.
484a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass EchoControlMobile {
485a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public:
486a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // EchoCancellation and EchoControlMobile may not be enabled simultaneously.
487a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Enabling one will disable the other.
488a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int Enable(bool enable) = 0;
489a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_enabled() const = 0;
490a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
491a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Recommended settings for particular audio routes. In general, the louder
492a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // the echo is expected to be, the higher this value should be set. The
493a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // preferred setting may vary from device to device.
494a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  enum RoutingMode {
495a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kQuietEarpieceOrHeadset,
496a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kEarpiece,
497a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kLoudEarpiece,
498a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kSpeakerphone,
499a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kLoudSpeakerphone
500a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  };
501a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
502a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Sets echo control appropriate for the audio routing |mode| on the device.
503a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // It can and should be updated during a call if the audio routing changes.
504a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_routing_mode(RoutingMode mode) = 0;
505a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual RoutingMode routing_mode() const = 0;
506a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
507a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Comfort noise replaces suppressed background noise to maintain a
508a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // consistent signal level.
509a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int enable_comfort_noise(bool enable) = 0;
510a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_comfort_noise_enabled() const = 0;
511a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
512a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // A typical use case is to initialize the component with an echo path from a
513a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // previous call. The echo path is retrieved using |GetEchoPath()|, typically
514a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // at the end of a call. The data can then be stored for later use as an
515a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // initializer before the next call, using |SetEchoPath()|.
516a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
517a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Controlling the echo path this way requires the data |size_bytes| to match
518a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // the internal echo path size. This size can be acquired using
519a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // |echo_path_size_bytes()|. |SetEchoPath()| causes an entire reset, worth
520a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // noting if it is to be called during an ongoing call.
521a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
522a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // It is possible that version incompatibilities may result in a stored echo
523a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // path of the incorrect size. In this case, the stored path should be
524a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // discarded.
525a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int SetEchoPath(const void* echo_path, size_t size_bytes) = 0;
526a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int GetEchoPath(void* echo_path, size_t size_bytes) const = 0;
527a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
528a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // The returned path size is guaranteed not to change for the lifetime of
529a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // the application.
530a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  static size_t echo_path_size_bytes();
531a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
532a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected:
533b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org  virtual ~EchoControlMobile() {}
534a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org};
535a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
536a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The automatic gain control (AGC) component brings the signal to an
537a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// appropriate range. This is done by applying a digital gain directly and, in
538a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// the analog mode, prescribing an analog gain to be applied at the audio HAL.
539a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
540a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Recommended to be enabled on the client-side.
541a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass GainControl {
542a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public:
543a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int Enable(bool enable) = 0;
544a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_enabled() const = 0;
545a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
546a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // When an analog mode is set, this must be called prior to |ProcessStream()|
547a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // to pass the current analog level from the audio HAL. Must be within the
548a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // range provided to |set_analog_level_limits()|.
549a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_stream_analog_level(int level) = 0;
550a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
551a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // When an analog mode is set, this should be called after |ProcessStream()|
552a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // to obtain the recommended new analog level for the audio HAL. It is the
553a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // users responsibility to apply this level.
554a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int stream_analog_level() = 0;
555a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
556a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  enum Mode {
557a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // Adaptive mode intended for use if an analog volume control is available
558a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // on the capture device. It will require the user to provide coupling
559a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // between the OS mixer controls and AGC through the |stream_analog_level()|
560a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // functions.
561a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    //
562a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // It consists of an analog gain prescription for the audio device and a
563a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // digital compression stage.
564a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kAdaptiveAnalog,
565a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
566a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // Adaptive mode intended for situations in which an analog volume control
567a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // is unavailable. It operates in a similar fashion to the adaptive analog
568a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // mode, but with scaling instead applied in the digital domain. As with
569a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // the analog mode, it additionally uses a digital compression stage.
570a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kAdaptiveDigital,
571a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
572a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // Fixed mode which enables only the digital compression stage also used by
573a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // the two adaptive modes.
574a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    //
575a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // It is distinguished from the adaptive modes by considering only a
576a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // short time-window of the input signal. It applies a fixed gain through
577a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // most of the input level range, and compresses (gradually reduces gain
578a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // with increasing level) the input signal at higher levels. This mode is
579a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // preferred on embedded devices where the capture signal level is
580a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    // predictable, so that a known gain can be applied.
581a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kFixedDigital
582a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  };
583a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
584a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_mode(Mode mode) = 0;
585a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual Mode mode() const = 0;
586a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
587a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Sets the target peak |level| (or envelope) of the AGC in dBFs (decibels
588a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // from digital full-scale). The convention is to use positive values. For
589a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // instance, passing in a value of 3 corresponds to -3 dBFs, or a target
590a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // level 3 dB below full-scale. Limited to [0, 31].
591a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
592a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // TODO(ajm): use a negative value here instead, if/when VoE will similarly
593a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //            update its interface.
594a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_target_level_dbfs(int level) = 0;
595a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int target_level_dbfs() const = 0;
596a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
597a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Sets the maximum |gain| the digital compression stage may apply, in dB. A
598a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // higher number corresponds to greater compression, while a value of 0 will
599a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // leave the signal uncompressed. Limited to [0, 90].
600a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_compression_gain_db(int gain) = 0;
601a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int compression_gain_db() const = 0;
602a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
603a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // When enabled, the compression stage will hard limit the signal to the
604a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // target level. Otherwise, the signal will be compressed but not limited
605a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // above the target level.
606a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int enable_limiter(bool enable) = 0;
607a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_limiter_enabled() const = 0;
608a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
609a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Sets the |minimum| and |maximum| analog levels of the audio capture device.
610a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Must be set if and only if an analog mode is used. Limited to [0, 65535].
611a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_analog_level_limits(int minimum,
612a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org                                      int maximum) = 0;
613a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int analog_level_minimum() const = 0;
614a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int analog_level_maximum() const = 0;
615a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
616a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Returns true if the AGC has detected a saturation event (period where the
617a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // signal reaches digital full-scale) in the current frame and the analog
618a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // level cannot be reduced.
619a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
620a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // This could be used as an indicator to reduce or disable analog mic gain at
621a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // the audio HAL.
622a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool stream_is_saturated() const = 0;
623a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
624a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected:
625b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org  virtual ~GainControl() {}
626a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org};
627a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
628a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// A filtering component which removes DC offset and low-frequency noise.
629a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Recommended to be enabled on the client-side.
630a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass HighPassFilter {
631a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public:
632a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int Enable(bool enable) = 0;
633a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_enabled() const = 0;
634a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
635a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected:
636b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org  virtual ~HighPassFilter() {}
637a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org};
638a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
639a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// An estimation component used to retrieve level metrics.
640a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass LevelEstimator {
641a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public:
642a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int Enable(bool enable) = 0;
643a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_enabled() const = 0;
644a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
645a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Returns the root mean square (RMS) level in dBFs (decibels from digital
646a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // full-scale), or alternately dBov. It is computed over all primary stream
647a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // frames since the last call to RMS(). The returned value is positive but
648a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // should be interpreted as negative. It is constrained to [0, 127].
649a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
6508ec46c6eaba40f28c2296b729df316dec132f0b0andrew@webrtc.org  // The computation follows: https://tools.ietf.org/html/rfc6465
651a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // with the intent that it can provide the RTP audio level indication.
652a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
653a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Frames passed to ProcessStream() with an |_energy| of zero are considered
654a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // to have been muted. The RMS of the frame will be interpreted as -127.
655a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int RMS() = 0;
656a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
657a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected:
658b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org  virtual ~LevelEstimator() {}
659a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org};
660a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
661a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The noise suppression (NS) component attempts to remove noise while
662a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// retaining speech. Recommended to be enabled on the client-side.
663a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
664a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Recommended to be enabled on the client-side.
665a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass NoiseSuppression {
666a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public:
667a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int Enable(bool enable) = 0;
668a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_enabled() const = 0;
669a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
670a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Determines the aggressiveness of the suppression. Increasing the level
671a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // will reduce the noise level at the expense of a higher speech distortion.
672a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  enum Level {
673a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kLow,
674a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kModerate,
675a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kHigh,
676a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kVeryHigh
677a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  };
678a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
679a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_level(Level level) = 0;
680a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual Level level() const = 0;
681a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
682a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Returns the internally computed prior speech probability of current frame
683a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // averaged over output channels. This is not supported in fixed point, for
684a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // which |kUnsupportedFunctionError| is returned.
685a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual float speech_probability() const = 0;
686a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
687a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected:
688b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org  virtual ~NoiseSuppression() {}
689a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org};
690a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
691a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The voice activity detection (VAD) component analyzes the stream to
692a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// determine if voice is present. A facility is also provided to pass in an
693a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// external VAD decision.
694a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org//
695a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// In addition to |stream_has_voice()| the VAD decision is provided through the
696a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// |AudioFrame| passed to |ProcessStream()|. The |vad_activity_| member will be
697a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// modified to reflect the current decision.
698a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass VoiceDetection {
699a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public:
700a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int Enable(bool enable) = 0;
701a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool is_enabled() const = 0;
702a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
703a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Returns true if voice is detected in the current frame. Should be called
704a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // after |ProcessStream()|.
705a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual bool stream_has_voice() const = 0;
706a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
707a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Some of the APM functionality requires a VAD decision. In the case that
708a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // a decision is externally available for the current frame, it can be passed
709a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // in here, before |ProcessStream()| is called.
710a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
711a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // VoiceDetection does _not_ need to be enabled to use this. If it happens to
712a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // be enabled, detection will be skipped for any frame in which an external
713a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // VAD decision is provided.
714a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_stream_has_voice(bool has_voice) = 0;
715a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
716a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Specifies the likelihood that a frame will be declared to contain voice.
717a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // A higher value makes it more likely that speech will not be clipped, at
718a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // the expense of more noise being detected as voice.
719a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  enum Likelihood {
720a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kVeryLowLikelihood,
721a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kLowLikelihood,
722a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kModerateLikelihood,
723a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org    kHighLikelihood
724a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  };
725a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
726a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_likelihood(Likelihood likelihood) = 0;
727a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual Likelihood likelihood() const = 0;
728a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
729a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // Sets the |size| of the frames in ms on which the VAD will operate. Larger
730a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // frames will improve detection accuracy, but reduce the frequency of
731a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // updates.
732a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  //
733a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  // This does not impact the size of frames passed to |ProcessStream()|.
734a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int set_frame_size_ms(int size) = 0;
735a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org  virtual int frame_size_ms() const = 0;
736a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
737a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected:
738b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org  virtual ~VoiceDetection() {}
739a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org};
740a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org}  // namespace webrtc
741a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org
742b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
743