1e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/* 2e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 4e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Use of this source code is governed by a BSD-style license 5e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * that can be found in the LICENSE file in the root of the source 6e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * tree. An additional intellectual property rights grant can be found 7e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * in the file PATENTS. All contributing project authors may 8e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * be found in the AUTHORS file in the root of the source tree. 9e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */ 10e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 11e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "voice_detection_impl.h" 12e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 13e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include <cassert> 14e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 15e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "critical_section_wrapper.h" 16e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "webrtc_vad.h" 17e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 18e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "audio_processing_impl.h" 19e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "audio_buffer.h" 20e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 21e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentnamespace webrtc { 22e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 23e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurenttypedef VadInst Handle; 24e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 25e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentnamespace { 26e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) { 27e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent switch (likelihood) { 28e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent case VoiceDetection::kVeryLowLikelihood: 29e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return 3; 30e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent break; 31e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent case VoiceDetection::kLowLikelihood: 32e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return 2; 33e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent break; 34e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent case VoiceDetection::kModerateLikelihood: 35e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return 1; 36e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent break; 37e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent case VoiceDetection::kHighLikelihood: 38e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return 0; 39e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent break; 40e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent default: 41e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return -1; 42e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 43e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 44e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} // namespace 45e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 46e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 47e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentVoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm) 48e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent : ProcessingComponent(apm), 49e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent apm_(apm), 50e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent stream_has_voice_(false), 51e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent using_external_vad_(false), 52e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent likelihood_(kLowLikelihood), 53e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent frame_size_ms_(10), 54e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent frame_size_samples_(0) {} 55e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 56e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentVoiceDetectionImpl::~VoiceDetectionImpl() {} 57e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 58e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { 59e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent if (!is_component_enabled()) { 60e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kNoError; 61e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 62e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 63e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent if (using_external_vad_) { 64e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent using_external_vad_ = false; 65e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kNoError; 66e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 67e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent assert(audio->samples_per_split_channel() <= 160); 68e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 69e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); 70e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent if (audio->num_channels() > 1) { 71e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent audio->CopyAndMixLowPass(1); 72e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent mixed_data = audio->mixed_low_pass_data(0); 73e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 74e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 75e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent // TODO(ajm): concatenate data in frame buffer here. 76e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 77c55a96383497a772a307b346368133960b02ad03Eric Laurent int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), 78c55a96383497a772a307b346368133960b02ad03Eric Laurent apm_->split_sample_rate_hz(), 79c55a96383497a772a307b346368133960b02ad03Eric Laurent mixed_data, 80c55a96383497a772a307b346368133960b02ad03Eric Laurent frame_size_samples_); 81c55a96383497a772a307b346368133960b02ad03Eric Laurent if (vad_ret == 0) { 82e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent stream_has_voice_ = false; 83c55a96383497a772a307b346368133960b02ad03Eric Laurent audio->set_activity(AudioFrame::kVadPassive); 84c55a96383497a772a307b346368133960b02ad03Eric Laurent } else if (vad_ret == 1) { 85e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent stream_has_voice_ = true; 86c55a96383497a772a307b346368133960b02ad03Eric Laurent audio->set_activity(AudioFrame::kVadActive); 87e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } else { 88e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kUnspecifiedError; 89e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 90e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 91e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kNoError; 92e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 93e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 94e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::Enable(bool enable) { 95e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent CriticalSectionScoped crit_scoped(*apm_->crit()); 96e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return EnableComponent(enable); 97e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 98e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 99e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentbool VoiceDetectionImpl::is_enabled() const { 100e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return is_component_enabled(); 101e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 102e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 103e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { 104e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent using_external_vad_ = true; 105e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent stream_has_voice_ = has_voice; 106e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kNoError; 107e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 108e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 109e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentbool VoiceDetectionImpl::stream_has_voice() const { 110e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent // TODO(ajm): enable this assertion? 111e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent //assert(using_external_vad_ || is_component_enabled()); 112e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return stream_has_voice_; 113e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 114e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 115e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { 116e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent CriticalSectionScoped crit_scoped(*apm_->crit()); 117e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent if (MapSetting(likelihood) == -1) { 118e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kBadParameterError; 119e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 120e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 121e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent likelihood_ = likelihood; 122e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return Configure(); 123e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 124e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 125e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentVoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { 126e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return likelihood_; 127e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 128e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 129e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::set_frame_size_ms(int size) { 130e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent CriticalSectionScoped crit_scoped(*apm_->crit()); 131e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent assert(size == 10); // TODO(ajm): remove when supported. 132e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent if (size != 10 && 133e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent size != 20 && 134e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent size != 30) { 135e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kBadParameterError; 136e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 137e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 138e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent frame_size_ms_ = size; 139e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 140e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return Initialize(); 141e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 142e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 143e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::frame_size_ms() const { 144e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return frame_size_ms_; 145e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 146e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 147e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::Initialize() { 148e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent int err = ProcessingComponent::Initialize(); 149e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent if (err != apm_->kNoError || !is_component_enabled()) { 150e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return err; 151e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 152e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 153e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent using_external_vad_ = false; 154e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000); 155e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent // TODO(ajm): intialize frame buffer here. 156e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 157e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kNoError; 158e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 159e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 160e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::get_version(char* version, 161e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent int version_len_bytes) const { 162e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent if (WebRtcVad_get_version(version, version_len_bytes) != 0) { 163e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kBadParameterError; 164e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 165e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 166e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kNoError; 167e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 168e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 169e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentvoid* VoiceDetectionImpl::CreateHandle() const { 170e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent Handle* handle = NULL; 171e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent if (WebRtcVad_Create(&handle) != apm_->kNoError) { 172e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent handle = NULL; 173e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } else { 174e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent assert(handle != NULL); 175e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent } 176e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 177e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return handle; 178e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 179e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 180e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::DestroyHandle(void* handle) const { 181e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return WebRtcVad_Free(static_cast<Handle*>(handle)); 182e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 183e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 184e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::InitializeHandle(void* handle) const { 185e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return WebRtcVad_Init(static_cast<Handle*>(handle)); 186e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 187e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 188e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::ConfigureHandle(void* handle) const { 189e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return WebRtcVad_set_mode(static_cast<Handle*>(handle), 190e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent MapSetting(likelihood_)); 191e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 192e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 193e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::num_handles_required() const { 194e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return 1; 195e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 196e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 197e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::GetHandleError(void* handle) const { 198e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent // The VAD has no get_error() function. 199e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent assert(handle != NULL); 200e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent return apm_->kUnspecifiedError; 201e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} 202e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} // namespace webrtc 203