1a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org/* 2a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org * 4a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org * Use of this source code is governed by a BSD-style license 5a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org * that can be found in the LICENSE file in the root of the source 6a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org * tree. An additional intellectual property rights grant can be found 7a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org * in the file PATENTS. All contributing project authors may 8a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org * be found in the AUTHORS file in the root of the source tree. 9a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org */ 10a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 11b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ 12b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#define WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ 13a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 14b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#include <stddef.h> // size_t 15c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org#include <stdio.h> // FILE 16a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 1726a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org#include "webrtc/base/platform_file.h" 18d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org#include "webrtc/common.h" 19b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#include "webrtc/typedefs.h" 20a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 21333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.orgstruct AecCore; 22333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org 23a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgnamespace webrtc { 24a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 25a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass AudioFrame; 26a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass EchoCancellation; 27a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass EchoControlMobile; 28a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass GainControl; 29a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass HighPassFilter; 30a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass LevelEstimator; 31a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass NoiseSuppression; 32a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass VoiceDetection; 33a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 34870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// Use to enable the delay correction feature. This now engages an extended 35870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// filter mode in the AEC, along with robustness measures around the reported 36870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// system delays. It comes with a significant increase in AEC complexity, but is 37870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// much more robust to unreliable reported delays. 38870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// 39870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// Detailed changes to the algorithm: 40870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// - The filter length is changed from 48 to 128 ms. This comes with tuning of 41870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// several parameters: i) filter adaptation stepsize and error threshold; 42870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// ii) non-linear processing smoothing and overdrive. 43870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// - Option to ignore the reported delays on platforms which we deem 44870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// sufficiently unreliable. See WEBRTC_UNTRUSTED_DELAY in echo_cancellation.c. 45870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// - Faster startup times by removing the excessive "startup phase" processing 46870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// of reported delays. 47870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// - Much more conservative adjustments to the far-end read pointer. We smooth 48870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// the delay difference more heavily, and back off from the difference more. 49870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// Adjustments force a readaptation of the filter, so they should be avoided 50870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org// except when really necessary. 51870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.orgstruct DelayCorrection { 52870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org DelayCorrection() : enabled(false) {} 539d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org explicit DelayCorrection(bool enabled) : enabled(enabled) {} 549d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org bool enabled; 559d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org}; 56870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org 5784ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// Use to disable the reported system delays. By disabling the reported system 5884ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// delays the echo cancellation algorithm assumes the process and reverse 5984ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// streams to be aligned. This configuration only applies to EchoCancellation 6084ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// and not EchoControlMobile and is set with AudioProcessing::SetExtraOptions(). 6184ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// Note that by disabling reported system delays the EchoCancellation may 6284ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org// regress in performance. 6384ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.orgstruct ReportedDelay { 6484ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org ReportedDelay() : enabled(true) {} 6584ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org explicit ReportedDelay(bool enabled) : enabled(enabled) {} 6684ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org bool enabled; 6784ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org}; 6884ba16898e487bd64f8e7e44220ee22963f55f24bjornv@webrtc.org 699d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org// Must be provided through AudioProcessing::Create(Confg&). It will have no 709d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org// impact if used with AudioProcessing::SetExtraOptions(). 719d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.orgstruct ExperimentalAgc { 729d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org ExperimentalAgc() : enabled(true) {} 739d5a54769be619151edbbc9bef8987a261d40a87andrew@webrtc.org explicit ExperimentalAgc(bool enabled) : enabled(enabled) {} 74870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org bool enabled; 75870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org}; 76870404d75203d20ce358bc78a59df32163284a8fandrew@webrtc.org 7791e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org// Use to enable experimental noise suppression. It can be set in the 7891e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org// constructor or using AudioProcessing::SetExtraOptions(). 7991e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.orgstruct ExperimentalNs { 8091e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org ExperimentalNs() : enabled(false) {} 8191e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org explicit ExperimentalNs(bool enabled) : enabled(enabled) {} 8291e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org bool enabled; 8391e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org}; 8491e936fcd7b48a3a2f2a56092e5e512c1dfe98afaluebs@webrtc.org 85bf4f2321ef4e31f8b973edb62ba2a81d8ec8bb01andrew@webrtc.orgstatic const int kAudioProcMaxNativeSampleRateHz = 32000; 86bf4f2321ef4e31f8b973edb62ba2a81d8ec8bb01andrew@webrtc.org 87a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The Audio Processing Module (APM) provides a collection of voice processing 88a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// components designed for real-time communications software. 89a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 90a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// APM operates on two audio streams on a frame-by-frame basis. Frames of the 91a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// primary stream, on which all processing is applied, are passed to 92a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// |ProcessStream()|. Frames of the reverse direction stream, which are used for 93a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// analysis by some components, are passed to |AnalyzeReverseStream()|. On the 94a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// client-side, this will typically be the near-end (capture) and far-end 95a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// (render) streams, respectively. APM should be placed in the signal chain as 96a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// close to the audio hardware abstraction layer (HAL) as possible. 97a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 98a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// On the server-side, the reverse stream will normally not be used, with 99a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// processing occurring on each incoming stream. 100a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 101a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Component interfaces follow a similar pattern and are accessed through 102a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// corresponding getters in APM. All components are disabled at create-time, 103a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// with default settings that are recommended for most situations. New settings 104a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// can be applied without enabling a component. Enabling a component triggers 105a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// memory allocation and initialization to allow it to start processing the 106a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// streams. 107a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 108a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Thread safety is provided with the following assumptions to reduce locking 109a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// overhead: 110a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 1. The stream getters and setters are called from the same thread as 111a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// ProcessStream(). More precisely, stream functions are never called 112a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// concurrently with ProcessStream(). 113a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 2. Parameter getters are never called concurrently with the corresponding 114a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// setter. 115a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 1162e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org// APM accepts only linear PCM audio data in chunks of 10 ms. The int16 1172e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org// interfaces use interleaved data, while the float interfaces use deinterleaved 1182e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org// data. 119a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 120a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Usage example, omitting error checking: 121a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// AudioProcessing* apm = AudioProcessing::Create(0); 122a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 123a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->high_pass_filter()->Enable(true); 124a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 125a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->echo_cancellation()->enable_drift_compensation(false); 126a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->echo_cancellation()->Enable(true); 127a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 128a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->noise_reduction()->set_level(kHighSuppression); 129a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->noise_reduction()->Enable(true); 130a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 131a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->gain_control()->set_analog_level_limits(0, 255); 132a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->gain_control()->set_mode(kAdaptiveAnalog); 133a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->gain_control()->Enable(true); 134a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 135a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->voice_detection()->Enable(true); 136a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 137a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Start a voice call... 138a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 139a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // ... Render frame arrives bound for the audio HAL ... 140a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->AnalyzeReverseStream(render_frame); 141a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 142a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // ... Capture frame arrives from the audio HAL ... 143a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Call required set_stream_ functions. 144a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->set_stream_delay_ms(delay_ms); 145a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->gain_control()->set_stream_analog_level(analog_level); 146a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 147a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->ProcessStream(capture_frame); 148a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 149a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Call required stream_ functions. 150a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// analog_level = apm->gain_control()->stream_analog_level(); 151a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// has_voice = apm->stream_has_voice(); 152a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 153a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Repeate render and capture processing for the duration of the call... 154a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Start a new call... 155a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// apm->Initialize(); 156a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 157a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// // Close the application... 158bee99b186be691f09915e368e1361274db797f57andrew@webrtc.org// delete apm; 159a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 1606f8b05124cf17bd76ba3d623a1462d701b754cfbandrew@webrtc.orgclass AudioProcessing { 161a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public: 1623c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org enum ChannelLayout { 1633c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org kMono, 1643c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org // Left, right. 1653c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org kStereo, 1663c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org // Mono, keyboard mic. 1673c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org kMonoAndKeyboard, 1683c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org // Left, right, keyboard mic. 1693c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org kStereoAndKeyboard 1703c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org }; 1713c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org 172680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org // Creates an APM instance. Use one instance for every primary audio stream 173680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org // requiring processing. On the client-side, this would typically be one 174680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org // instance for the near-end stream, and additional instances for each far-end 175680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org // stream which requires processing. On the server-side, this would typically 176680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org // be one instance for every incoming stream. 1776a6e3ebebf71a4827d45a92e229c3d0f94e36c5dandrew@webrtc.org static AudioProcessing* Create(); 178680d3ca6040dd57db60c6b3d8f053124f31d66ffandrew@webrtc.org // Allows passing in an optional configuration at create-time. 1796a6e3ebebf71a4827d45a92e229c3d0f94e36c5dandrew@webrtc.org static AudioProcessing* Create(const Config& config); 1806a6e3ebebf71a4827d45a92e229c3d0f94e36c5dandrew@webrtc.org // TODO(ajm): Deprecated; remove all calls to it. 181a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org static AudioProcessing* Create(int id); 182b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org virtual ~AudioProcessing() {} 183a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 184a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Initializes internal states, while retaining all user settings. This 185a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // should be called before beginning to process a new audio stream. However, 186a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // it is not necessary to call before processing the first stream after 1872e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // creation. 1882e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // 1892e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // It is also not necessary to call if the audio parameters (sample 190926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org // rate and number of channels) have changed. Passing updated parameters 191926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. 1922e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // If the parameters are known at init-time though, they may be provided. 193a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int Initialize() = 0; 1942e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org 1952e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // The int16 interfaces require: 1962e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // - only |NativeRate|s be used 1972e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // - that the input, output and reverse rates must match 1982e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // - that |output_layout| matches |input_layout| 1992e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // 2002e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // The float interfaces accept arbitrary rates and support differing input 2012e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // and output layouts, but the output may only remove channels, not add. 2022e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org virtual int Initialize(int input_sample_rate_hz, 2032e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org int output_sample_rate_hz, 20439dd100c67a7e2cfbbe4e6025055684eab5be49dandrew@webrtc.org int reverse_sample_rate_hz, 2052e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org ChannelLayout input_layout, 2062e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org ChannelLayout output_layout, 2072e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org ChannelLayout reverse_layout) = 0; 208a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 209d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org // Pass down additional options which don't have explicit setters. This 210d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org // ensures the options are applied immediately. 211d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org virtual void SetExtraOptions(const Config& config) = 0; 212d13f24b7dc75a319dbc608a20e2c982b12418b2aandrew@webrtc.org 2132e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // DEPRECATED. 2142e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // TODO(ajm): Remove after Chromium has upgraded to using Initialize(). 215a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_sample_rate_hz(int rate) = 0; 2162e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // TODO(ajm): Remove after voice engine no longer requires it to resample 2172e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // the reverse stream to the forward rate. 2182e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org virtual int input_sample_rate_hz() const = 0; 2193ab5093256ac8cdcb65efda14f99934ceb164b70andrew@webrtc.org // TODO(ajm): Remove after Chromium no longer depends on it. 2203ab5093256ac8cdcb65efda14f99934ceb164b70andrew@webrtc.org virtual int sample_rate_hz() const = 0; 2212e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org 2222e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // TODO(ajm): Only intended for internal use. Make private and friend the 2232e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // necessary classes? 2242e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org virtual int proc_sample_rate_hz() const = 0; 2252e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org virtual int proc_split_sample_rate_hz() const = 0; 226a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int num_input_channels() const = 0; 227a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int num_output_channels() const = 0; 228a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int num_reverse_channels() const = 0; 229a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 230f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org // Set to true when the output of AudioProcessing will be muted or in some 231f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org // other way not used. Ideally, the captured audio would still be processed, 232f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org // but some components may change behavior based on this information. 233f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org // Default false. 234f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org virtual void set_output_will_be_muted(bool muted) = 0; 235f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org virtual bool output_will_be_muted() const = 0; 236f4f1d1aee1d58983fa4462a521f2130376b1ee86andrew@webrtc.org 237a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Processes a 10 ms |frame| of the primary audio stream. On the client-side, 238a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // this is the near-end (or captured) audio. 239a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 240a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // If needed for enabled functionality, any function with the set_stream_ tag 241a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // must be called prior to processing the current frame. Any getter function 242a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // with the stream_ tag which is needed should be called after processing. 243a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 244a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| 245926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org // members of |frame| must be valid. If changed from the previous call to this 246926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org // method, it will trigger an initialization. 247a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int ProcessStream(AudioFrame* frame) = 0; 248a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 2493c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org // Accepts deinterleaved float audio with the range [-1, 1]. Each element 2502e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // of |src| points to a channel buffer, arranged according to 2513c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org // |input_layout|. At output, the channels will be arranged according to 2522e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // |output_layout| at |output_sample_rate_hz| in |dest|. 2532e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // 2542e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // The output layout may only remove channels, not add. |src| and |dest| 2552e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // may use the same memory, if desired. 2562e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org virtual int ProcessStream(const float* const* src, 2573c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org int samples_per_channel, 2582e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org int input_sample_rate_hz, 2593c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org ChannelLayout input_layout, 2602e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org int output_sample_rate_hz, 2612e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org ChannelLayout output_layout, 2622e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org float* const* dest) = 0; 2633c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org 264a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame 265a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // will not be modified. On the client-side, this is the far-end (or to be 266a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // rendered) audio. 267a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 268a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // It is only necessary to provide this if echo processing is enabled, as the 269a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // reverse stream forms the echo reference signal. It is recommended, but not 270a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // necessary, to provide if gain control is enabled. On the server-side this 271a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // typically will not be used. If you're not sure what to pass in here, 272a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // chances are you don't need to use it. 273a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 274a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| 275926e88a57614f8b90bbf14f74371991df3041cf1andrew@webrtc.org // members of |frame| must be valid. |sample_rate_hz_| must correspond to 2762e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // |input_sample_rate_hz()| 277a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 278a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // TODO(ajm): add const to input; requires an implementation fix. 279a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; 280a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 2813c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org // Accepts deinterleaved float audio with the range [-1, 1]. Each element 2823c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org // of |data| points to a channel buffer, arranged according to |layout|. 2833c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org virtual int AnalyzeReverseStream(const float* const* data, 2843c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org int samples_per_channel, 2853c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org int sample_rate_hz, 2863c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org ChannelLayout layout) = 0; 2873c5112c3f633f8d16d0ab05a34422ea24ecf4274andrew@webrtc.org 288a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // This must be called if and only if echo processing is enabled. 289a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 290a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end 291a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // frame and ProcessStream() receiving a near-end frame containing the 292a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // corresponding echo. On the client-side this can be expressed as 293a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // delay = (t_render - t_analyze) + (t_process - t_capture) 294a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // where, 295a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and 296a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // t_render is the time the first sample of the same frame is rendered by 297a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the audio hardware. 298a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // - t_capture is the time the first sample of a frame is captured by the 299a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // audio hardware and t_pull is the time the same frame is passed to 300a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // ProcessStream(). 301a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_stream_delay_ms(int delay) = 0; 302a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int stream_delay_ms() const = 0; 303873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org virtual bool was_stream_delay_set() const = 0; 304a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 305247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org // Call to signal that a key press occurred (true) or did not occur (false) 306247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org // with this chunk of audio. 307247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org virtual void set_stream_key_pressed(bool key_pressed) = 0; 308247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org virtual bool stream_key_pressed() const = 0; 309247df83ab88df78aae6a58321a5d71dbf6a468e4andrew@webrtc.org 310a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Sets a delay |offset| in ms to add to the values passed in through 311a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // set_stream_delay_ms(). May be positive or negative. 312a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 313a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Note that this could cause an otherwise valid value passed to 314a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // set_stream_delay_ms() to return an error. 315a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual void set_delay_offset_ms(int offset) = 0; 316a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int delay_offset_ms() const = 0; 317a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 318a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Starts recording debugging information to a file specified by |filename|, 319a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // a NULL-terminated string. If there is an ongoing recording, the old file 320a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // will be closed, and recording will continue in the newly specified file. 321a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // An already existing file will be overwritten without warning. 322a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org static const size_t kMaxFilenameSize = 1024; 323a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int StartDebugRecording(const char filename[kMaxFilenameSize]) = 0; 324a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 325c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org // Same as above but uses an existing file handle. Takes ownership 326c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org // of |handle| and closes it at StopDebugRecording(). 327c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org virtual int StartDebugRecording(FILE* handle) = 0; 328c8bd97520bca8aae52771986731cbdf30dc03252henrikg@webrtc.org 32926a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org // Same as above but uses an existing PlatformFile handle. Takes ownership 33026a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org // of |handle| and closes it at StopDebugRecording(). 33126a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org // TODO(xians): Make this interface pure virtual. 33226a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org virtual int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) { 33326a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org return -1; 33426a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org } 33526a0c4c9568f9e616e9e9fa8652911ddd1f1f70atnakamura@webrtc.org 336a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Stops recording debugging information, and closes the file. Recording 337a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // cannot be resumed in the same file (without overwriting it). 338a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int StopDebugRecording() = 0; 339a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 340a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // These provide access to the component interfaces and should never return 341a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // NULL. The pointers will be valid for the lifetime of the APM instance. 342a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // The memory for these objects is entirely managed internally. 343a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual EchoCancellation* echo_cancellation() const = 0; 344a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual EchoControlMobile* echo_control_mobile() const = 0; 345a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual GainControl* gain_control() const = 0; 346a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual HighPassFilter* high_pass_filter() const = 0; 347a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual LevelEstimator* level_estimator() const = 0; 348a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual NoiseSuppression* noise_suppression() const = 0; 349a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual VoiceDetection* voice_detection() const = 0; 350a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 351a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org struct Statistic { 352a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org int instant; // Instantaneous value. 353a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org int average; // Long-term average. 354a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org int maximum; // Long-term maximum. 355a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org int minimum; // Long-term minimum. 356a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org }; 357a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 358a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org enum Error { 359a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Fatal errors. 360a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kNoError = 0, 361a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kUnspecifiedError = -1, 362a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kCreationFailedError = -2, 363a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kUnsupportedComponentError = -3, 364a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kUnsupportedFunctionError = -4, 365a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kNullPointerError = -5, 366a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kBadParameterError = -6, 367a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kBadSampleRateError = -7, 368a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kBadDataLengthError = -8, 369a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kBadNumberChannelsError = -9, 370a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kFileError = -10, 371a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kStreamParameterNotSetError = -11, 372a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kNotEnabledError = -12, 373a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 374a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Warnings are non-fatal. 375a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // This results when a set_stream_ parameter is out of range. Processing 376a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // will continue, but the parameter may have been truncated. 377a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kBadStreamParameterWarning = -13 378a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org }; 379873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org 3802e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org enum NativeRate { 381873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org kSampleRate8kHz = 8000, 382873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org kSampleRate16kHz = 16000, 383873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org kSampleRate32kHz = 32000 384873f357ba460cd4786c40faa81d2607fc6b8b38fandrew@webrtc.org }; 3852e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org 3862e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org static const int kChunkSizeMs = 10; 387a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org}; 388a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 389a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The acoustic echo cancellation (AEC) component provides better performance 390a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// than AECM but also requires more processing power and is dependent on delay 391a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// stability and reporting accuracy. As such it is well-suited and recommended 392a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// for PC and IP phone applications. 393a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 394a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Not recommended to be enabled on the server-side. 395a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass EchoCancellation { 396a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public: 397a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // EchoCancellation and EchoControlMobile may not be enabled simultaneously. 398a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Enabling one will disable the other. 399a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int Enable(bool enable) = 0; 400a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_enabled() const = 0; 401a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 402a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Differences in clock speed on the primary and reverse streams can impact 403a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the AEC performance. On the client-side, this could be seen when different 404a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // render and capture devices are used, particularly with webcams. 405a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 406a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // This enables a compensation mechanism, and requires that 4072e244605324aa2c5183ee2bf332cc10606d27fd8andrew@webrtc.org // set_stream_drift_samples() be called. 408a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int enable_drift_compensation(bool enable) = 0; 409a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_drift_compensation_enabled() const = 0; 410a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 411a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Sets the difference between the number of samples rendered and captured by 412a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the audio devices since the last call to |ProcessStream()|. Must be called 413ad179ceb706f406b03e9871d1078301368a7fbcfandrew@webrtc.org // if drift compensation is enabled, prior to |ProcessStream()|. 414ad179ceb706f406b03e9871d1078301368a7fbcfandrew@webrtc.org virtual void set_stream_drift_samples(int drift) = 0; 415a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int stream_drift_samples() const = 0; 416a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 417a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org enum SuppressionLevel { 418a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kLowSuppression, 419a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kModerateSuppression, 420a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kHighSuppression 421a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org }; 422a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 423a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Sets the aggressiveness of the suppressor. A higher level trades off 424a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // double-talk performance for increased echo suppression. 425a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_suppression_level(SuppressionLevel level) = 0; 426a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual SuppressionLevel suppression_level() const = 0; 427a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 428a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Returns false if the current frame almost certainly contains no echo 429a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // and true if it _might_ contain echo. 430a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool stream_has_echo() const = 0; 431a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 432a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Enables the computation of various echo metrics. These are obtained 433a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // through |GetMetrics()|. 434a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int enable_metrics(bool enable) = 0; 435a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool are_metrics_enabled() const = 0; 436a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 437a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Each statistic is reported in dB. 438a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // P_far: Far-end (render) signal power. 439a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // P_echo: Near-end (capture) echo signal power. 440a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // P_out: Signal power at the output of the AEC. 441a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // P_a: Internal signal power at the point before the AEC's non-linear 442a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // processor. 443a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org struct Metrics { 444a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // RERL = ERL + ERLE 445a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org AudioProcessing::Statistic residual_echo_return_loss; 446a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 447a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // ERL = 10log_10(P_far / P_echo) 448a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org AudioProcessing::Statistic echo_return_loss; 449a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 450a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // ERLE = 10log_10(P_echo / P_out) 451a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org AudioProcessing::Statistic echo_return_loss_enhancement; 452a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 453a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a) 454a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org AudioProcessing::Statistic a_nlp; 455a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org }; 456a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 457a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // TODO(ajm): discuss the metrics update period. 458a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int GetMetrics(Metrics* metrics) = 0; 459a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 460a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Enables computation and logging of delay values. Statistics are obtained 461a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // through |GetDelayMetrics()|. 462a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int enable_delay_logging(bool enable) = 0; 463a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_delay_logging_enabled() const = 0; 464a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 465a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // The delay metrics consists of the delay |median| and the delay standard 466a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // deviation |std|. The values are averaged over the time period since the 467a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // last call to |GetDelayMetrics()|. 468a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int GetDelayMetrics(int* median, int* std) = 0; 469a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 470333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org // Returns a pointer to the low level AEC component. In case of multiple 471333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org // channels, the pointer to the first one is returned. A NULL pointer is 472333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org // returned when the AEC component is disabled or has not been initialized 473333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org // successfully. 474333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org virtual struct AecCore* aec_core() const = 0; 475333987b84d079d79ec9aa43c747c46dd59e641ddbjornv@webrtc.org 476a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected: 477b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org virtual ~EchoCancellation() {} 478a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org}; 479a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 480a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The acoustic echo control for mobile (AECM) component is a low complexity 481a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// robust option intended for use on mobile devices. 482a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 483a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Not recommended to be enabled on the server-side. 484a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass EchoControlMobile { 485a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public: 486a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // EchoCancellation and EchoControlMobile may not be enabled simultaneously. 487a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Enabling one will disable the other. 488a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int Enable(bool enable) = 0; 489a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_enabled() const = 0; 490a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 491a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Recommended settings for particular audio routes. In general, the louder 492a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the echo is expected to be, the higher this value should be set. The 493a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // preferred setting may vary from device to device. 494a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org enum RoutingMode { 495a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kQuietEarpieceOrHeadset, 496a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kEarpiece, 497a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kLoudEarpiece, 498a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kSpeakerphone, 499a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kLoudSpeakerphone 500a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org }; 501a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 502a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Sets echo control appropriate for the audio routing |mode| on the device. 503a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // It can and should be updated during a call if the audio routing changes. 504a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_routing_mode(RoutingMode mode) = 0; 505a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual RoutingMode routing_mode() const = 0; 506a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 507a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Comfort noise replaces suppressed background noise to maintain a 508a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // consistent signal level. 509a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int enable_comfort_noise(bool enable) = 0; 510a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_comfort_noise_enabled() const = 0; 511a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 512a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // A typical use case is to initialize the component with an echo path from a 513a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // previous call. The echo path is retrieved using |GetEchoPath()|, typically 514a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // at the end of a call. The data can then be stored for later use as an 515a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // initializer before the next call, using |SetEchoPath()|. 516a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 517a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Controlling the echo path this way requires the data |size_bytes| to match 518a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the internal echo path size. This size can be acquired using 519a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // |echo_path_size_bytes()|. |SetEchoPath()| causes an entire reset, worth 520a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // noting if it is to be called during an ongoing call. 521a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 522a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // It is possible that version incompatibilities may result in a stored echo 523a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // path of the incorrect size. In this case, the stored path should be 524a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // discarded. 525a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int SetEchoPath(const void* echo_path, size_t size_bytes) = 0; 526a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int GetEchoPath(void* echo_path, size_t size_bytes) const = 0; 527a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 528a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // The returned path size is guaranteed not to change for the lifetime of 529a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the application. 530a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org static size_t echo_path_size_bytes(); 531a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 532a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected: 533b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org virtual ~EchoControlMobile() {} 534a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org}; 535a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 536a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The automatic gain control (AGC) component brings the signal to an 537a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// appropriate range. This is done by applying a digital gain directly and, in 538a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// the analog mode, prescribing an analog gain to be applied at the audio HAL. 539a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 540a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Recommended to be enabled on the client-side. 541a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass GainControl { 542a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public: 543a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int Enable(bool enable) = 0; 544a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_enabled() const = 0; 545a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 546a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // When an analog mode is set, this must be called prior to |ProcessStream()| 547a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // to pass the current analog level from the audio HAL. Must be within the 548a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // range provided to |set_analog_level_limits()|. 549a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_stream_analog_level(int level) = 0; 550a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 551a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // When an analog mode is set, this should be called after |ProcessStream()| 552a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // to obtain the recommended new analog level for the audio HAL. It is the 553a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // users responsibility to apply this level. 554a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int stream_analog_level() = 0; 555a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 556a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org enum Mode { 557a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Adaptive mode intended for use if an analog volume control is available 558a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // on the capture device. It will require the user to provide coupling 559a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // between the OS mixer controls and AGC through the |stream_analog_level()| 560a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // functions. 561a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 562a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // It consists of an analog gain prescription for the audio device and a 563a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // digital compression stage. 564a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kAdaptiveAnalog, 565a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 566a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Adaptive mode intended for situations in which an analog volume control 567a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // is unavailable. It operates in a similar fashion to the adaptive analog 568a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // mode, but with scaling instead applied in the digital domain. As with 569a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the analog mode, it additionally uses a digital compression stage. 570a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kAdaptiveDigital, 571a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 572a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Fixed mode which enables only the digital compression stage also used by 573a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the two adaptive modes. 574a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 575a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // It is distinguished from the adaptive modes by considering only a 576a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // short time-window of the input signal. It applies a fixed gain through 577a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // most of the input level range, and compresses (gradually reduces gain 578a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // with increasing level) the input signal at higher levels. This mode is 579a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // preferred on embedded devices where the capture signal level is 580a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // predictable, so that a known gain can be applied. 581a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kFixedDigital 582a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org }; 583a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 584a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_mode(Mode mode) = 0; 585a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual Mode mode() const = 0; 586a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 587a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Sets the target peak |level| (or envelope) of the AGC in dBFs (decibels 588a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // from digital full-scale). The convention is to use positive values. For 589a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // instance, passing in a value of 3 corresponds to -3 dBFs, or a target 590a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // level 3 dB below full-scale. Limited to [0, 31]. 591a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 592a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // TODO(ajm): use a negative value here instead, if/when VoE will similarly 593a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // update its interface. 594a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_target_level_dbfs(int level) = 0; 595a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int target_level_dbfs() const = 0; 596a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 597a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Sets the maximum |gain| the digital compression stage may apply, in dB. A 598a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // higher number corresponds to greater compression, while a value of 0 will 599a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // leave the signal uncompressed. Limited to [0, 90]. 600a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_compression_gain_db(int gain) = 0; 601a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int compression_gain_db() const = 0; 602a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 603a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // When enabled, the compression stage will hard limit the signal to the 604a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // target level. Otherwise, the signal will be compressed but not limited 605a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // above the target level. 606a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int enable_limiter(bool enable) = 0; 607a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_limiter_enabled() const = 0; 608a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 609a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Sets the |minimum| and |maximum| analog levels of the audio capture device. 610a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Must be set if and only if an analog mode is used. Limited to [0, 65535]. 611a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_analog_level_limits(int minimum, 612a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org int maximum) = 0; 613a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int analog_level_minimum() const = 0; 614a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int analog_level_maximum() const = 0; 615a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 616a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Returns true if the AGC has detected a saturation event (period where the 617a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // signal reaches digital full-scale) in the current frame and the analog 618a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // level cannot be reduced. 619a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 620a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // This could be used as an indicator to reduce or disable analog mic gain at 621a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the audio HAL. 622a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool stream_is_saturated() const = 0; 623a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 624a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected: 625b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org virtual ~GainControl() {} 626a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org}; 627a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 628a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// A filtering component which removes DC offset and low-frequency noise. 629a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Recommended to be enabled on the client-side. 630a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass HighPassFilter { 631a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public: 632a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int Enable(bool enable) = 0; 633a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_enabled() const = 0; 634a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 635a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected: 636b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org virtual ~HighPassFilter() {} 637a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org}; 638a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 639a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// An estimation component used to retrieve level metrics. 640a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass LevelEstimator { 641a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public: 642a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int Enable(bool enable) = 0; 643a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_enabled() const = 0; 644a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 645a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Returns the root mean square (RMS) level in dBFs (decibels from digital 646a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // full-scale), or alternately dBov. It is computed over all primary stream 647a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // frames since the last call to RMS(). The returned value is positive but 648a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // should be interpreted as negative. It is constrained to [0, 127]. 649a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 6508ec46c6eaba40f28c2296b729df316dec132f0b0andrew@webrtc.org // The computation follows: https://tools.ietf.org/html/rfc6465 651a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // with the intent that it can provide the RTP audio level indication. 652a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 653a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Frames passed to ProcessStream() with an |_energy| of zero are considered 654a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // to have been muted. The RMS of the frame will be interpreted as -127. 655a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int RMS() = 0; 656a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 657a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected: 658b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org virtual ~LevelEstimator() {} 659a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org}; 660a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 661a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The noise suppression (NS) component attempts to remove noise while 662a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// retaining speech. Recommended to be enabled on the client-side. 663a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 664a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// Recommended to be enabled on the client-side. 665a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass NoiseSuppression { 666a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public: 667a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int Enable(bool enable) = 0; 668a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_enabled() const = 0; 669a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 670a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Determines the aggressiveness of the suppression. Increasing the level 671a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // will reduce the noise level at the expense of a higher speech distortion. 672a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org enum Level { 673a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kLow, 674a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kModerate, 675a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kHigh, 676a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kVeryHigh 677a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org }; 678a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 679a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_level(Level level) = 0; 680a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual Level level() const = 0; 681a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 682a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Returns the internally computed prior speech probability of current frame 683a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // averaged over output channels. This is not supported in fixed point, for 684a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // which |kUnsupportedFunctionError| is returned. 685a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual float speech_probability() const = 0; 686a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 687a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected: 688b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org virtual ~NoiseSuppression() {} 689a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org}; 690a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 691a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// The voice activity detection (VAD) component analyzes the stream to 692a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// determine if voice is present. A facility is also provided to pass in an 693a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// external VAD decision. 694a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// 695a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// In addition to |stream_has_voice()| the VAD decision is provided through the 696a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// |AudioFrame| passed to |ProcessStream()|. The |vad_activity_| member will be 697a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org// modified to reflect the current decision. 698a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.orgclass VoiceDetection { 699a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org public: 700a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int Enable(bool enable) = 0; 701a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool is_enabled() const = 0; 702a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 703a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Returns true if voice is detected in the current frame. Should be called 704a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // after |ProcessStream()|. 705a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual bool stream_has_voice() const = 0; 706a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 707a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Some of the APM functionality requires a VAD decision. In the case that 708a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // a decision is externally available for the current frame, it can be passed 709a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // in here, before |ProcessStream()| is called. 710a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 711a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // VoiceDetection does _not_ need to be enabled to use this. If it happens to 712a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // be enabled, detection will be skipped for any frame in which an external 713a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // VAD decision is provided. 714a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_stream_has_voice(bool has_voice) = 0; 715a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 716a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Specifies the likelihood that a frame will be declared to contain voice. 717a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // A higher value makes it more likely that speech will not be clipped, at 718a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // the expense of more noise being detected as voice. 719a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org enum Likelihood { 720a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kVeryLowLikelihood, 721a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kLowLikelihood, 722a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kModerateLikelihood, 723a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org kHighLikelihood 724a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org }; 725a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 726a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_likelihood(Likelihood likelihood) = 0; 727a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual Likelihood likelihood() const = 0; 728a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 729a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // Sets the |size| of the frames in ms on which the VAD will operate. Larger 730a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // frames will improve detection accuracy, but reduce the frequency of 731a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // updates. 732a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // 733a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org // This does not impact the size of frames passed to |ProcessStream()|. 734a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int set_frame_size_ms(int size) = 0; 735a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org virtual int frame_size_ms() const = 0; 736a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 737a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org protected: 738b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org virtual ~VoiceDetection() {} 739a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org}; 740a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org} // namespace webrtc 741a7b57da12f9863cdb36b81c4bf2c5cc48e91974andrew@webrtc.org 742b9247754e6a9b877fcde2ff9504f6bcd2527cef8andrew@webrtc.org#endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ 743