1/* 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" 12 13#include <algorithm> 14 15#include "webrtc/base/checks.h" 16 17namespace webrtc { 18namespace { 19 20const size_t kMaxLength = 320; 21const size_t kNumChannels = 1; 22 23const double kDefaultVoiceValue = 1.0; 24const double kNeutralProbability = 0.5; 25const double kLowProbability = 0.01; 26 27} // namespace 28 29VoiceActivityDetector::VoiceActivityDetector() 30 : last_voice_probability_(kDefaultVoiceValue), 31 standalone_vad_(StandaloneVad::Create()) { 32} 33 34// Because ISAC has a different chunk length, it updates 35// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data. 36// Otherwise it clears them. 37void VoiceActivityDetector::ProcessChunk(const int16_t* audio, 38 size_t length, 39 int sample_rate_hz) { 40 RTC_DCHECK_EQ(static_cast<int>(length), sample_rate_hz / 100); 41 RTC_DCHECK_LE(length, kMaxLength); 42 // Resample to the required rate. 43 const int16_t* resampled_ptr = audio; 44 if (sample_rate_hz != kSampleRateHz) { 45 RTC_CHECK_EQ( 46 resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), 47 0); 48 resampler_.Push(audio, length, resampled_, kLength10Ms, length); 49 resampled_ptr = resampled_; 50 } 51 RTC_DCHECK_EQ(length, kLength10Ms); 52 53 // Each chunk needs to be passed into |standalone_vad_|, because internally it 54 // buffers the audio and processes it all at once when GetActivity() is 55 // called. 56 RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0); 57 58 audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); 59 60 chunkwise_voice_probabilities_.resize(features_.num_frames); 61 chunkwise_rms_.resize(features_.num_frames); 62 std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), 63 chunkwise_rms_.begin()); 64 if (features_.num_frames > 0) { 65 if (features_.silence) { 66 // The other features are invalid, so set the voice probabilities to an 67 // arbitrary low value. 68 std::fill(chunkwise_voice_probabilities_.begin(), 69 chunkwise_voice_probabilities_.end(), kLowProbability); 70 } else { 71 std::fill(chunkwise_voice_probabilities_.begin(), 72 chunkwise_voice_probabilities_.end(), kNeutralProbability); 73 RTC_CHECK_GE( 74 standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], 75 chunkwise_voice_probabilities_.size()), 76 0); 77 RTC_CHECK_GE(pitch_based_vad_.VoicingProbability( 78 features_, &chunkwise_voice_probabilities_[0]), 79 0); 80 } 81 last_voice_probability_ = chunkwise_voice_probabilities_.back(); 82 } 83} 84 85} // namespace webrtc 86