1b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org/* 2b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * 4b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Use of this source code is governed by a BSD-style license 5b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * that can be found in the LICENSE file in the root of the source 6b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * tree. An additional intellectual property rights grant can be found 7b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * in the file PATENTS. All contributing project authors may 8b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * be found in the AUTHORS file in the root of the source tree. 9b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org */ 10b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 119fb16139d917ba32720e031d3c871987d418668fpbos@webrtc.org#include "webrtc/modules/audio_processing/voice_detection_impl.h" 12b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 133f45c2e0ac4cb280f941efa3a3476895795e3dd6pbos@webrtc.org#include <assert.h> 14b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 159fb16139d917ba32720e031d3c871987d418668fpbos@webrtc.org#include "webrtc/common_audio/vad/include/webrtc_vad.h" 169fb16139d917ba32720e031d3c871987d418668fpbos@webrtc.org#include "webrtc/modules/audio_processing/audio_buffer.h" 17a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" 18b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 19b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgnamespace webrtc { 20b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 21b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgtypedef VadInst Handle; 22b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 23b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgnamespace { 24b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint MapSetting(VoiceDetection::Likelihood likelihood) { 25b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org switch (likelihood) { 26b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org case VoiceDetection::kVeryLowLikelihood: 27b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return 3; 28b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org case VoiceDetection::kLowLikelihood: 29b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return 2; 30b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org case VoiceDetection::kModerateLikelihood: 31b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return 1; 32b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org case VoiceDetection::kHighLikelihood: 33b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return 0; 34b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 35b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org assert(false); 36b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return -1; 37b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 38b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} // namespace 39b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 40a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.orgVoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, 41a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org CriticalSectionWrapper* crit) 42a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org : ProcessingComponent(), 43b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org apm_(apm), 44a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org crit_(crit), 45b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org stream_has_voice_(false), 46b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org using_external_vad_(false), 47b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org likelihood_(kLowLikelihood), 48b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org frame_size_ms_(10), 49b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org frame_size_samples_(0) {} 50b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 51b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgVoiceDetectionImpl::~VoiceDetectionImpl() {} 52b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 53b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { 54b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (!is_component_enabled()) { 55b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return apm_->kNoError; 56b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 57b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 58b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (using_external_vad_) { 59b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org using_external_vad_ = false; 60b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return apm_->kNoError; 61b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 62b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org assert(audio->samples_per_split_channel() <= 160); 63b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 64b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // TODO(ajm): concatenate data in frame buffer here. 65b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 66b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), 67467f7567c8befea153861e09f048f69932d6e3bdandrew@webrtc.org apm_->proc_split_sample_rate_hz(), 68eb15100c9bdb4c97ffda2c05a934aab270795c27aluebs@webrtc.org audio->mixed_low_pass_data(), 69b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org frame_size_samples_); 70b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (vad_ret == 0) { 71b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org stream_has_voice_ = false; 72b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org audio->set_activity(AudioFrame::kVadPassive); 73b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } else if (vad_ret == 1) { 74b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org stream_has_voice_ = true; 75b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org audio->set_activity(AudioFrame::kVadActive); 76b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } else { 77b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return apm_->kUnspecifiedError; 78b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 79b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 80b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return apm_->kNoError; 81b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 82b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 83b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::Enable(bool enable) { 84a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org CriticalSectionScoped crit_scoped(crit_); 85b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return EnableComponent(enable); 86b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 87b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 88b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgbool VoiceDetectionImpl::is_enabled() const { 89b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return is_component_enabled(); 90b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 91b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 92b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { 93b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org using_external_vad_ = true; 94b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org stream_has_voice_ = has_voice; 95b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return apm_->kNoError; 96b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 97b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 98b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgbool VoiceDetectionImpl::stream_has_voice() const { 99b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // TODO(ajm): enable this assertion? 100b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org //assert(using_external_vad_ || is_component_enabled()); 101b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return stream_has_voice_; 102b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 103b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 104b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { 105a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org CriticalSectionScoped crit_scoped(crit_); 106b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (MapSetting(likelihood) == -1) { 107b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return apm_->kBadParameterError; 108b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 109b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 110b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org likelihood_ = likelihood; 111b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return Configure(); 112b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 113b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 114b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgVoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { 115b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return likelihood_; 116b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 117b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 118b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::set_frame_size_ms(int size) { 119a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org CriticalSectionScoped crit_scoped(crit_); 120b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org assert(size == 10); // TODO(ajm): remove when supported. 121b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (size != 10 && 122b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org size != 20 && 123b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org size != 30) { 124b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return apm_->kBadParameterError; 125b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 126b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 127b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org frame_size_ms_ = size; 128b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 129b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return Initialize(); 130b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 131b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 132b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::frame_size_ms() const { 133b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return frame_size_ms_; 134b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 135b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 136b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::Initialize() { 137b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org int err = ProcessingComponent::Initialize(); 138b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (err != apm_->kNoError || !is_component_enabled()) { 139b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return err; 140b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 141b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 142b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org using_external_vad_ = false; 143467f7567c8befea153861e09f048f69932d6e3bdandrew@webrtc.org frame_size_samples_ = frame_size_ms_ * 144467f7567c8befea153861e09f048f69932d6e3bdandrew@webrtc.org apm_->proc_split_sample_rate_hz() / 1000; 145b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // TODO(ajm): intialize frame buffer here. 146b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 147b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return apm_->kNoError; 148b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 149b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 150b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid* VoiceDetectionImpl::CreateHandle() const { 151b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org Handle* handle = NULL; 152b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org if (WebRtcVad_Create(&handle) != apm_->kNoError) { 153b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org handle = NULL; 154b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } else { 155b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org assert(handle != NULL); 156b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org } 157b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 158b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return handle; 159b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 160b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 161ffc2de0133683fb103eef61f21134f469bc099dbbjornv@webrtc.orgvoid VoiceDetectionImpl::DestroyHandle(void* handle) const { 162642e80e5fed3b05fdada3dd8b5dbdfabbccaa5cebjornv@webrtc.org WebRtcVad_Free(static_cast<Handle*>(handle)); 163b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 164b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 165b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::InitializeHandle(void* handle) const { 166b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return WebRtcVad_Init(static_cast<Handle*>(handle)); 167b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 168b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 169b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::ConfigureHandle(void* handle) const { 170b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return WebRtcVad_set_mode(static_cast<Handle*>(handle), 171b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org MapSetting(likelihood_)); 172b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 173b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 174b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::num_handles_required() const { 175b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return 1; 176b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 177b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org 178b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::GetHandleError(void* handle) const { 179b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org // The VAD has no get_error() function. 180b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org assert(handle != NULL); 181b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org return apm_->kUnspecifiedError; 182b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} 183b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} // namespace webrtc 184