1030249dd247444687663c4969ff078dc0a4b24acekm/* 2030249dd247444687663c4969ff078dc0a4b24acekm * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3030249dd247444687663c4969ff078dc0a4b24acekm * 4030249dd247444687663c4969ff078dc0a4b24acekm * Use of this source code is governed by a BSD-style license 5030249dd247444687663c4969ff078dc0a4b24acekm * that can be found in the LICENSE file in the root of the source 6030249dd247444687663c4969ff078dc0a4b24acekm * tree. An additional intellectual property rights grant can be found 7030249dd247444687663c4969ff078dc0a4b24acekm * in the file PATENTS. All contributing project authors may 8030249dd247444687663c4969ff078dc0a4b24acekm * be found in the AUTHORS file in the root of the source tree. 9030249dd247444687663c4969ff078dc0a4b24acekm */ 10030249dd247444687663c4969ff078dc0a4b24acekm 11db4fecfb01ac51e936e4b7496a4929e713080f07ekm// 12db4fecfb01ac51e936e4b7496a4929e713080f07ekm// Implements core class for intelligibility enhancer. 13db4fecfb01ac51e936e4b7496a4929e713080f07ekm// 14db4fecfb01ac51e936e4b7496a4929e713080f07ekm// Details of the model and algorithm can be found in the original paper: 15db4fecfb01ac51e936e4b7496a4929e713080f07ekm// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788 16db4fecfb01ac51e936e4b7496a4929e713080f07ekm// 17db4fecfb01ac51e936e4b7496a4929e713080f07ekm 18030249dd247444687663c4969ff078dc0a4b24acekm#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h" 19030249dd247444687663c4969ff078dc0a4b24acekm 2035b72fbceb09031cbd6039e0dbbd44ed24296509ekm#include <math.h> 2135b72fbceb09031cbd6039e0dbbd44ed24296509ekm#include <stdlib.h> 22030249dd247444687663c4969ff078dc0a4b24acekm#include <algorithm> 23db4fecfb01ac51e936e4b7496a4929e713080f07ekm#include <numeric> 24030249dd247444687663c4969ff078dc0a4b24acekm 25030249dd247444687663c4969ff078dc0a4b24acekm#include "webrtc/base/checks.h" 2660d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson#include "webrtc/common_audio/include/audio_util.h" 27030249dd247444687663c4969ff078dc0a4b24acekm#include "webrtc/common_audio/window_generator.h" 28030249dd247444687663c4969ff078dc0a4b24acekm 29030249dd247444687663c4969ff078dc0a4b24acekmnamespace webrtc { 30030249dd247444687663c4969ff078dc0a4b24acekm 3135b72fbceb09031cbd6039e0dbbd44ed24296509ekmnamespace { 32db4fecfb01ac51e936e4b7496a4929e713080f07ekm 33dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kastingconst size_t kErbResolution = 2; 3435b72fbceb09031cbd6039e0dbbd44ed24296509ekmconst int kWindowSizeMs = 2; 3535b72fbceb09031cbd6039e0dbbd44ed24296509ekmconst int kChunkSizeMs = 10; // Size provided by APM. 3635b72fbceb09031cbd6039e0dbbd44ed24296509ekmconst float kClipFreq = 200.0f; 3735b72fbceb09031cbd6039e0dbbd44ed24296509ekmconst float kConfigRho = 0.02f; // Default production and interpretation SNR. 3835b72fbceb09031cbd6039e0dbbd44ed24296509ekmconst float kKbdAlpha = 1.5f; 3935b72fbceb09031cbd6039e0dbbd44ed24296509ekmconst float kLambdaBot = -1.0f; // Extreme values in bisection 4035b72fbceb09031cbd6039e0dbbd44ed24296509ekmconst float kLambdaTop = -10e-18f; // search for lamda. 41030249dd247444687663c4969ff078dc0a4b24acekm 4235b72fbceb09031cbd6039e0dbbd44ed24296509ekm} // namespace 4335b72fbceb09031cbd6039e0dbbd44ed24296509ekm 4435b72fbceb09031cbd6039e0dbbd44ed24296509ekmusing std::complex; 4535b72fbceb09031cbd6039e0dbbd44ed24296509ekmusing std::max; 4635b72fbceb09031cbd6039e0dbbd44ed24296509ekmusing std::min; 47030249dd247444687663c4969ff078dc0a4b24acekmusing VarianceType = intelligibility::VarianceArray::StepType; 48030249dd247444687663c4969ff078dc0a4b24acekm 49030249dd247444687663c4969ff078dc0a4b24acekmIntelligibilityEnhancer::TransformCallback::TransformCallback( 50030249dd247444687663c4969ff078dc0a4b24acekm IntelligibilityEnhancer* parent, 51030249dd247444687663c4969ff078dc0a4b24acekm IntelligibilityEnhancer::AudioSource source) 52db4fecfb01ac51e936e4b7496a4929e713080f07ekm : parent_(parent), source_(source) { 53db4fecfb01ac51e936e4b7496a4929e713080f07ekm} 54030249dd247444687663c4969ff078dc0a4b24acekm 55030249dd247444687663c4969ff078dc0a4b24acekmvoid IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( 56030249dd247444687663c4969ff078dc0a4b24acekm const complex<float>* const* in_block, 576955870806624479723addfae6dcf5d13968796cPeter Kasting size_t in_channels, 58dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t frames, 596955870806624479723addfae6dcf5d13968796cPeter Kasting size_t /* out_channels */, 60030249dd247444687663c4969ff078dc0a4b24acekm complex<float>* const* out_block) { 6191d6edef35e7275879c30ce16ecb8b6dc73c6e4ahenrikg RTC_DCHECK_EQ(parent_->freqs_, frames); 626955870806624479723addfae6dcf5d13968796cPeter Kasting for (size_t i = 0; i < in_channels; ++i) { 63030249dd247444687663c4969ff078dc0a4b24acekm parent_->DispatchAudio(source_, in_block[i], out_block[i]); 64030249dd247444687663c4969ff078dc0a4b24acekm } 65030249dd247444687663c4969ff078dc0a4b24acekm} 66030249dd247444687663c4969ff078dc0a4b24acekm 6760d9b332a5391045439bfb6a3a5447973e3d5603ekmeyersonIntelligibilityEnhancer::IntelligibilityEnhancer() 6860d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) { 6960d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson} 7060d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson 7160d9b332a5391045439bfb6a3a5447973e3d5603ekmeyersonIntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) 72db4fecfb01ac51e936e4b7496a4929e713080f07ekm : freqs_(RealFourier::ComplexLength( 7360d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))), 74dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))), 75dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting chunk_length_( 76dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)), 7760d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)), 7860d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson sample_rate_hz_(config.sample_rate_hz), 7960d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson erb_resolution_(kErbResolution), 8060d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson num_capture_channels_(config.num_capture_channels), 8160d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson num_render_channels_(config.num_render_channels), 8260d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson analysis_rate_(config.analysis_rate), 8360d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson active_(true), 84db4fecfb01ac51e936e4b7496a4929e713080f07ekm clear_variance_(freqs_, 8560d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson config.var_type, 8660d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson config.var_window_size, 8760d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson config.var_decay_rate), 8860d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson noise_variance_(freqs_, 8960d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson config.var_type, 9060d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson config.var_window_size, 9160d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson config.var_decay_rate), 92030249dd247444687663c4969ff078dc0a4b24acekm filtered_clear_var_(new float[bank_size_]), 93030249dd247444687663c4969ff078dc0a4b24acekm filtered_noise_var_(new float[bank_size_]), 9435b72fbceb09031cbd6039e0dbbd44ed24296509ekm filter_bank_(bank_size_), 95030249dd247444687663c4969ff078dc0a4b24acekm center_freqs_(new float[bank_size_]), 96030249dd247444687663c4969ff078dc0a4b24acekm rho_(new float[bank_size_]), 97030249dd247444687663c4969ff078dc0a4b24acekm gains_eq_(new float[bank_size_]), 9860d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson gain_applier_(freqs_, config.gain_change_limit), 9960d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson temp_render_out_buffer_(chunk_length_, num_render_channels_), 10060d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson temp_capture_out_buffer_(chunk_length_, num_capture_channels_), 101030249dd247444687663c4969ff078dc0a4b24acekm kbd_window_(new float[window_size_]), 102030249dd247444687663c4969ff078dc0a4b24acekm render_callback_(this, AudioSource::kRenderStream), 103030249dd247444687663c4969ff078dc0a4b24acekm capture_callback_(this, AudioSource::kCaptureStream), 104030249dd247444687663c4969ff078dc0a4b24acekm block_count_(0), 10560d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson analysis_step_(0) { 10691d6edef35e7275879c30ce16ecb8b6dc73c6e4ahenrikg RTC_DCHECK_LE(config.rho, 1.0f); 107030249dd247444687663c4969ff078dc0a4b24acekm 108030249dd247444687663c4969ff078dc0a4b24acekm CreateErbBank(); 109030249dd247444687663c4969ff078dc0a4b24acekm 110db4fecfb01ac51e936e4b7496a4929e713080f07ekm // Assumes all rho equal. 111dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t i = 0; i < bank_size_; ++i) { 11260d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson rho_[i] = config.rho * config.rho; 113030249dd247444687663c4969ff078dc0a4b24acekm } 114030249dd247444687663c4969ff078dc0a4b24acekm 115030249dd247444687663c4969ff078dc0a4b24acekm float freqs_khz = kClipFreq / 1000.0f; 116dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t erb_index = static_cast<size_t>(ceilf( 117db4fecfb01ac51e936e4b7496a4929e713080f07ekm 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); 118dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_); 119030249dd247444687663c4969ff078dc0a4b24acekm 120030249dd247444687663c4969ff078dc0a4b24acekm WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, 121030249dd247444687663c4969ff078dc0a4b24acekm kbd_window_.get()); 122db4fecfb01ac51e936e4b7496a4929e713080f07ekm render_mangler_.reset(new LappedTransform( 12360d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson num_render_channels_, num_render_channels_, chunk_length_, 12460d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_)); 125db4fecfb01ac51e936e4b7496a4929e713080f07ekm capture_mangler_.reset(new LappedTransform( 12660d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson num_capture_channels_, num_capture_channels_, chunk_length_, 12760d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_)); 128030249dd247444687663c4969ff078dc0a4b24acekm} 129030249dd247444687663c4969ff078dc0a4b24acekm 13060d9b332a5391045439bfb6a3a5447973e3d5603ekmeyersonvoid IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, 13160d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson int sample_rate_hz, 1326955870806624479723addfae6dcf5d13968796cPeter Kasting size_t num_channels) { 13391d6edef35e7275879c30ce16ecb8b6dc73c6e4ahenrikg RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); 13491d6edef35e7275879c30ce16ecb8b6dc73c6e4ahenrikg RTC_CHECK_EQ(num_render_channels_, num_channels); 135030249dd247444687663c4969ff078dc0a4b24acekm 13660d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson if (active_) { 13760d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels()); 138030249dd247444687663c4969ff078dc0a4b24acekm } 139030249dd247444687663c4969ff078dc0a4b24acekm 14060d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson if (active_) { 1416955870806624479723addfae6dcf5d13968796cPeter Kasting for (size_t i = 0; i < num_render_channels_; ++i) { 14260d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson memcpy(audio[i], temp_render_out_buffer_.channels()[i], 14360d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson chunk_length_ * sizeof(**audio)); 14460d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson } 145030249dd247444687663c4969ff078dc0a4b24acekm } 146030249dd247444687663c4969ff078dc0a4b24acekm} 147030249dd247444687663c4969ff078dc0a4b24acekm 14860d9b332a5391045439bfb6a3a5447973e3d5603ekmeyersonvoid IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio, 14960d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson int sample_rate_hz, 1506955870806624479723addfae6dcf5d13968796cPeter Kasting size_t num_channels) { 15191d6edef35e7275879c30ce16ecb8b6dc73c6e4ahenrikg RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); 15291d6edef35e7275879c30ce16ecb8b6dc73c6e4ahenrikg RTC_CHECK_EQ(num_capture_channels_, num_channels); 153db4fecfb01ac51e936e4b7496a4929e713080f07ekm 15460d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels()); 155030249dd247444687663c4969ff078dc0a4b24acekm} 156030249dd247444687663c4969ff078dc0a4b24acekm 157030249dd247444687663c4969ff078dc0a4b24acekmvoid IntelligibilityEnhancer::DispatchAudio( 158030249dd247444687663c4969ff078dc0a4b24acekm IntelligibilityEnhancer::AudioSource source, 159db4fecfb01ac51e936e4b7496a4929e713080f07ekm const complex<float>* in_block, 160db4fecfb01ac51e936e4b7496a4929e713080f07ekm complex<float>* out_block) { 161030249dd247444687663c4969ff078dc0a4b24acekm switch (source) { 162030249dd247444687663c4969ff078dc0a4b24acekm case kRenderStream: 163030249dd247444687663c4969ff078dc0a4b24acekm ProcessClearBlock(in_block, out_block); 164030249dd247444687663c4969ff078dc0a4b24acekm break; 165030249dd247444687663c4969ff078dc0a4b24acekm case kCaptureStream: 166030249dd247444687663c4969ff078dc0a4b24acekm ProcessNoiseBlock(in_block, out_block); 167030249dd247444687663c4969ff078dc0a4b24acekm break; 168030249dd247444687663c4969ff078dc0a4b24acekm } 169030249dd247444687663c4969ff078dc0a4b24acekm} 170030249dd247444687663c4969ff078dc0a4b24acekm 171030249dd247444687663c4969ff078dc0a4b24acekmvoid IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block, 172030249dd247444687663c4969ff078dc0a4b24acekm complex<float>* out_block) { 173030249dd247444687663c4969ff078dc0a4b24acekm if (block_count_ < 2) { 174030249dd247444687663c4969ff078dc0a4b24acekm memset(out_block, 0, freqs_ * sizeof(*out_block)); 175030249dd247444687663c4969ff078dc0a4b24acekm ++block_count_; 176030249dd247444687663c4969ff078dc0a4b24acekm return; 177030249dd247444687663c4969ff078dc0a4b24acekm } 178030249dd247444687663c4969ff078dc0a4b24acekm 17960d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary. 18060d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson if (true) { 181030249dd247444687663c4969ff078dc0a4b24acekm clear_variance_.Step(in_block, false); 182030249dd247444687663c4969ff078dc0a4b24acekm if (block_count_ % analysis_rate_ == analysis_rate_ - 1) { 18360d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson const float power_target = std::accumulate( 18460d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.f); 185030249dd247444687663c4969ff078dc0a4b24acekm AnalyzeClearBlock(power_target); 186030249dd247444687663c4969ff078dc0a4b24acekm ++analysis_step_; 187030249dd247444687663c4969ff078dc0a4b24acekm } 188030249dd247444687663c4969ff078dc0a4b24acekm ++block_count_; 189030249dd247444687663c4969ff078dc0a4b24acekm } 190030249dd247444687663c4969ff078dc0a4b24acekm 19160d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson if (active_) { 19260d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson gain_applier_.Apply(in_block, out_block); 19360d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson } 194030249dd247444687663c4969ff078dc0a4b24acekm} 195030249dd247444687663c4969ff078dc0a4b24acekm 196030249dd247444687663c4969ff078dc0a4b24acekmvoid IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) { 197030249dd247444687663c4969ff078dc0a4b24acekm FilterVariance(clear_variance_.variance(), filtered_clear_var_.get()); 198030249dd247444687663c4969ff078dc0a4b24acekm FilterVariance(noise_variance_.variance(), filtered_noise_var_.get()); 199030249dd247444687663c4969ff078dc0a4b24acekm 20035b72fbceb09031cbd6039e0dbbd44ed24296509ekm SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get()); 20135b72fbceb09031cbd6039e0dbbd44ed24296509ekm const float power_top = 202db4fecfb01ac51e936e4b7496a4929e713080f07ekm DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); 20335b72fbceb09031cbd6039e0dbbd44ed24296509ekm SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get()); 20435b72fbceb09031cbd6039e0dbbd44ed24296509ekm const float power_bot = 205db4fecfb01ac51e936e4b7496a4929e713080f07ekm DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); 20635b72fbceb09031cbd6039e0dbbd44ed24296509ekm if (power_target >= power_bot && power_target <= power_top) { 20735b72fbceb09031cbd6039e0dbbd44ed24296509ekm SolveForLambda(power_target, power_bot, power_top); 20835b72fbceb09031cbd6039e0dbbd44ed24296509ekm UpdateErbGains(); 20935b72fbceb09031cbd6039e0dbbd44ed24296509ekm } // Else experiencing variance underflow, so do nothing. 21035b72fbceb09031cbd6039e0dbbd44ed24296509ekm} 211030249dd247444687663c4969ff078dc0a4b24acekm 21235b72fbceb09031cbd6039e0dbbd44ed24296509ekmvoid IntelligibilityEnhancer::SolveForLambda(float power_target, 21335b72fbceb09031cbd6039e0dbbd44ed24296509ekm float power_bot, 21435b72fbceb09031cbd6039e0dbbd44ed24296509ekm float power_top) { 215db4fecfb01ac51e936e4b7496a4929e713080f07ekm const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values 216db4fecfb01ac51e936e4b7496a4929e713080f07ekm const int kMaxIters = 100; // for these, based on experiments. 21735b72fbceb09031cbd6039e0dbbd44ed24296509ekm 21835b72fbceb09031cbd6039e0dbbd44ed24296509ekm const float reciprocal_power_target = 1.f / power_target; 21935b72fbceb09031cbd6039e0dbbd44ed24296509ekm float lambda_bot = kLambdaBot; 22035b72fbceb09031cbd6039e0dbbd44ed24296509ekm float lambda_top = kLambdaTop; 22135b72fbceb09031cbd6039e0dbbd44ed24296509ekm float power_ratio = 2.0f; // Ratio of achieved power to target power. 222030249dd247444687663c4969ff078dc0a4b24acekm int iters = 0; 22335b72fbceb09031cbd6039e0dbbd44ed24296509ekm while (std::fabs(power_ratio - 1.0f) > kConvergeThresh && 22435b72fbceb09031cbd6039e0dbbd44ed24296509ekm iters <= kMaxIters) { 22535b72fbceb09031cbd6039e0dbbd44ed24296509ekm const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f; 226db4fecfb01ac51e936e4b7496a4929e713080f07ekm SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get()); 22735b72fbceb09031cbd6039e0dbbd44ed24296509ekm const float power = 22835b72fbceb09031cbd6039e0dbbd44ed24296509ekm DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); 229030249dd247444687663c4969ff078dc0a4b24acekm if (power < power_target) { 230030249dd247444687663c4969ff078dc0a4b24acekm lambda_bot = lambda; 231030249dd247444687663c4969ff078dc0a4b24acekm } else { 232030249dd247444687663c4969ff078dc0a4b24acekm lambda_top = lambda; 233030249dd247444687663c4969ff078dc0a4b24acekm } 23435b72fbceb09031cbd6039e0dbbd44ed24296509ekm power_ratio = std::fabs(power * reciprocal_power_target); 235030249dd247444687663c4969ff078dc0a4b24acekm ++iters; 236030249dd247444687663c4969ff078dc0a4b24acekm } 23735b72fbceb09031cbd6039e0dbbd44ed24296509ekm} 238030249dd247444687663c4969ff078dc0a4b24acekm 23935b72fbceb09031cbd6039e0dbbd44ed24296509ekmvoid IntelligibilityEnhancer::UpdateErbGains() { 240db4fecfb01ac51e936e4b7496a4929e713080f07ekm // (ERB gain) = filterbank' * (freq gain) 241030249dd247444687663c4969ff078dc0a4b24acekm float* gains = gain_applier_.target(); 242dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t i = 0; i < freqs_; ++i) { 243030249dd247444687663c4969ff078dc0a4b24acekm gains[i] = 0.0f; 244dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t j = 0; j < bank_size_; ++j) { 245030249dd247444687663c4969ff078dc0a4b24acekm gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); 246030249dd247444687663c4969ff078dc0a4b24acekm } 247030249dd247444687663c4969ff078dc0a4b24acekm } 248030249dd247444687663c4969ff078dc0a4b24acekm} 249030249dd247444687663c4969ff078dc0a4b24acekm 250030249dd247444687663c4969ff078dc0a4b24acekmvoid IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block, 251030249dd247444687663c4969ff078dc0a4b24acekm complex<float>* /*out_block*/) { 252030249dd247444687663c4969ff078dc0a4b24acekm noise_variance_.Step(in_block); 253030249dd247444687663c4969ff078dc0a4b24acekm} 254030249dd247444687663c4969ff078dc0a4b24acekm 255dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kastingsize_t IntelligibilityEnhancer::GetBankSize(int sample_rate, 256dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t erb_resolution) { 257030249dd247444687663c4969ff078dc0a4b24acekm float freq_limit = sample_rate / 2000.0f; 258dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t erb_scale = static_cast<size_t>(ceilf( 259dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f)); 260030249dd247444687663c4969ff078dc0a4b24acekm return erb_scale * erb_resolution; 261030249dd247444687663c4969ff078dc0a4b24acekm} 262030249dd247444687663c4969ff078dc0a4b24acekm 263030249dd247444687663c4969ff078dc0a4b24acekmvoid IntelligibilityEnhancer::CreateErbBank() { 264dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t lf = 1, rf = 4; 265030249dd247444687663c4969ff078dc0a4b24acekm 266dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t i = 0; i < bank_size_; ++i) { 267030249dd247444687663c4969ff078dc0a4b24acekm float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_)); 268030249dd247444687663c4969ff078dc0a4b24acekm center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); 269030249dd247444687663c4969ff078dc0a4b24acekm center_freqs_[i] -= 14678.49f; 270030249dd247444687663c4969ff078dc0a4b24acekm } 271030249dd247444687663c4969ff078dc0a4b24acekm float last_center_freq = center_freqs_[bank_size_ - 1]; 272dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t i = 0; i < bank_size_; ++i) { 273030249dd247444687663c4969ff078dc0a4b24acekm center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; 274030249dd247444687663c4969ff078dc0a4b24acekm } 275030249dd247444687663c4969ff078dc0a4b24acekm 276dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t i = 0; i < bank_size_; ++i) { 27735b72fbceb09031cbd6039e0dbbd44ed24296509ekm filter_bank_[i].resize(freqs_); 278030249dd247444687663c4969ff078dc0a4b24acekm } 279030249dd247444687663c4969ff078dc0a4b24acekm 280dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t i = 1; i <= bank_size_; ++i) { 281dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t lll, ll, rr, rrr; 282dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting static const size_t kOne = 1; // Avoids repeated static_cast<>s below. 283dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting lll = static_cast<size_t>(round( 284dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting center_freqs_[max(kOne, i - lf) - 1] * freqs_ / 285dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting (0.5f * sample_rate_hz_))); 286dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting ll = static_cast<size_t>(round( 287dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_))); 288dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting lll = min(freqs_, max(lll, kOne)) - 1; 289dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting ll = min(freqs_, max(ll, kOne)) - 1; 290dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting 291dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting rrr = static_cast<size_t>(round( 292dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / 293dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting (0.5f * sample_rate_hz_))); 294dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting rr = static_cast<size_t>(round( 295dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / 296dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting (0.5f * sample_rate_hz_))); 297dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting rrr = min(freqs_, max(rrr, kOne)) - 1; 298dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting rr = min(freqs_, max(rr, kOne)) - 1; 299030249dd247444687663c4969ff078dc0a4b24acekm 300030249dd247444687663c4969ff078dc0a4b24acekm float step, element; 301030249dd247444687663c4969ff078dc0a4b24acekm 302030249dd247444687663c4969ff078dc0a4b24acekm step = 1.0f / (ll - lll); 303030249dd247444687663c4969ff078dc0a4b24acekm element = 0.0f; 304dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t j = lll; j <= ll; ++j) { 305030249dd247444687663c4969ff078dc0a4b24acekm filter_bank_[i - 1][j] = element; 306030249dd247444687663c4969ff078dc0a4b24acekm element += step; 307030249dd247444687663c4969ff078dc0a4b24acekm } 308030249dd247444687663c4969ff078dc0a4b24acekm step = 1.0f / (rrr - rr); 309030249dd247444687663c4969ff078dc0a4b24acekm element = 1.0f; 310dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t j = rr; j <= rrr; ++j) { 311030249dd247444687663c4969ff078dc0a4b24acekm filter_bank_[i - 1][j] = element; 312030249dd247444687663c4969ff078dc0a4b24acekm element -= step; 313030249dd247444687663c4969ff078dc0a4b24acekm } 314dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t j = ll; j <= rr; ++j) { 315030249dd247444687663c4969ff078dc0a4b24acekm filter_bank_[i - 1][j] = 1.0f; 316030249dd247444687663c4969ff078dc0a4b24acekm } 317030249dd247444687663c4969ff078dc0a4b24acekm } 318030249dd247444687663c4969ff078dc0a4b24acekm 319030249dd247444687663c4969ff078dc0a4b24acekm float sum; 320dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t i = 0; i < freqs_; ++i) { 321030249dd247444687663c4969ff078dc0a4b24acekm sum = 0.0f; 322dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t j = 0; j < bank_size_; ++j) { 323030249dd247444687663c4969ff078dc0a4b24acekm sum += filter_bank_[j][i]; 324030249dd247444687663c4969ff078dc0a4b24acekm } 325dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t j = 0; j < bank_size_; ++j) { 326030249dd247444687663c4969ff078dc0a4b24acekm filter_bank_[j][i] /= sum; 327030249dd247444687663c4969ff078dc0a4b24acekm } 328030249dd247444687663c4969ff078dc0a4b24acekm } 329030249dd247444687663c4969ff078dc0a4b24acekm} 330030249dd247444687663c4969ff078dc0a4b24acekm 331db4fecfb01ac51e936e4b7496a4929e713080f07ekmvoid IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, 332dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t start_freq, 333db4fecfb01ac51e936e4b7496a4929e713080f07ekm float* sols) { 334030249dd247444687663c4969ff078dc0a4b24acekm bool quadratic = (kConfigRho < 1.0f); 335030249dd247444687663c4969ff078dc0a4b24acekm const float* var_x0 = filtered_clear_var_.get(); 336030249dd247444687663c4969ff078dc0a4b24acekm const float* var_n0 = filtered_noise_var_.get(); 337030249dd247444687663c4969ff078dc0a4b24acekm 338dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t n = 0; n < start_freq; ++n) { 339030249dd247444687663c4969ff078dc0a4b24acekm sols[n] = 1.0f; 340030249dd247444687663c4969ff078dc0a4b24acekm } 341db4fecfb01ac51e936e4b7496a4929e713080f07ekm 342db4fecfb01ac51e936e4b7496a4929e713080f07ekm // Analytic solution for optimal gains. See paper for derivation. 343dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t n = start_freq - 1; n < bank_size_; ++n) { 344030249dd247444687663c4969ff078dc0a4b24acekm float alpha0, beta0, gamma0; 345030249dd247444687663c4969ff078dc0a4b24acekm gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] + 346db4fecfb01ac51e936e4b7496a4929e713080f07ekm lambda * var_x0[n] * var_n0[n] * var_n0[n]; 347030249dd247444687663c4969ff078dc0a4b24acekm beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n]; 348030249dd247444687663c4969ff078dc0a4b24acekm if (quadratic) { 349030249dd247444687663c4969ff078dc0a4b24acekm alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n]; 350db4fecfb01ac51e936e4b7496a4929e713080f07ekm sols[n] = 351db4fecfb01ac51e936e4b7496a4929e713080f07ekm (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0); 352030249dd247444687663c4969ff078dc0a4b24acekm } else { 353030249dd247444687663c4969ff078dc0a4b24acekm sols[n] = -gamma0 / beta0; 354030249dd247444687663c4969ff078dc0a4b24acekm } 355030249dd247444687663c4969ff078dc0a4b24acekm sols[n] = fmax(0, sols[n]); 356030249dd247444687663c4969ff078dc0a4b24acekm } 357030249dd247444687663c4969ff078dc0a4b24acekm} 358030249dd247444687663c4969ff078dc0a4b24acekm 359030249dd247444687663c4969ff078dc0a4b24acekmvoid IntelligibilityEnhancer::FilterVariance(const float* var, float* result) { 36091d6edef35e7275879c30ce16ecb8b6dc73c6e4ahenrikg RTC_DCHECK_GT(freqs_, 0u); 361dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t i = 0; i < bank_size_; ++i) { 3627c5304c79151f092efcbef6680c5da366a930da2Jared Duke result[i] = DotProduct(&filter_bank_[i][0], var, freqs_); 363030249dd247444687663c4969ff078dc0a4b24acekm } 364030249dd247444687663c4969ff078dc0a4b24acekm} 365030249dd247444687663c4969ff078dc0a4b24acekm 366db4fecfb01ac51e936e4b7496a4929e713080f07ekmfloat IntelligibilityEnhancer::DotProduct(const float* a, 367db4fecfb01ac51e936e4b7496a4929e713080f07ekm const float* b, 368dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t length) { 369030249dd247444687663c4969ff078dc0a4b24acekm float ret = 0.0f; 370030249dd247444687663c4969ff078dc0a4b24acekm 371dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting for (size_t i = 0; i < length; ++i) { 372030249dd247444687663c4969ff078dc0a4b24acekm ret = fmaf(a[i], b[i], ret); 373030249dd247444687663c4969ff078dc0a4b24acekm } 374030249dd247444687663c4969ff078dc0a4b24acekm return ret; 375030249dd247444687663c4969ff078dc0a4b24acekm} 376030249dd247444687663c4969ff078dc0a4b24acekm 37760d9b332a5391045439bfb6a3a5447973e3d5603ekmeyersonbool IntelligibilityEnhancer::active() const { 37860d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson return active_; 37960d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson} 38060d9b332a5391045439bfb6a3a5447973e3d5603ekmeyerson 381030249dd247444687663c4969ff078dc0a4b24acekm} // namespace webrtc 382