1/* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "voice_detection_impl.h" 12 13#include <cassert> 14 15#include "critical_section_wrapper.h" 16#include "webrtc_vad.h" 17 18#include "audio_processing_impl.h" 19#include "audio_buffer.h" 20 21namespace webrtc { 22 23typedef VadInst Handle; 24 25namespace { 26WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) { 27 switch (likelihood) { 28 case VoiceDetection::kVeryLowLikelihood: 29 return 3; 30 break; 31 case VoiceDetection::kLowLikelihood: 32 return 2; 33 break; 34 case VoiceDetection::kModerateLikelihood: 35 return 1; 36 break; 37 case VoiceDetection::kHighLikelihood: 38 return 0; 39 break; 40 default: 41 return -1; 42 } 43} 44} // namespace 45 46 47VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm) 48 : ProcessingComponent(apm), 49 apm_(apm), 50 stream_has_voice_(false), 51 using_external_vad_(false), 52 likelihood_(kLowLikelihood), 53 frame_size_ms_(10), 54 frame_size_samples_(0) {} 55 56VoiceDetectionImpl::~VoiceDetectionImpl() {} 57 58int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { 59 if (!is_component_enabled()) { 60 return apm_->kNoError; 61 } 62 63 if (using_external_vad_) { 64 using_external_vad_ = false; 65 return apm_->kNoError; 66 } 67 assert(audio->samples_per_split_channel() <= 160); 68 69 WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); 70 if (audio->num_channels() > 1) { 71 audio->CopyAndMixLowPass(1); 72 mixed_data = audio->mixed_low_pass_data(0); 73 } 74 75 // TODO(ajm): concatenate data in frame buffer here. 76 77 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), 78 apm_->split_sample_rate_hz(), 79 mixed_data, 80 frame_size_samples_); 81 if (vad_ret == 0) { 82 stream_has_voice_ = false; 83 audio->set_activity(AudioFrame::kVadPassive); 84 } else if (vad_ret == 1) { 85 stream_has_voice_ = true; 86 audio->set_activity(AudioFrame::kVadActive); 87 } else { 88 return apm_->kUnspecifiedError; 89 } 90 91 return apm_->kNoError; 92} 93 94int VoiceDetectionImpl::Enable(bool enable) { 95 CriticalSectionScoped crit_scoped(*apm_->crit()); 96 return EnableComponent(enable); 97} 98 99bool VoiceDetectionImpl::is_enabled() const { 100 return is_component_enabled(); 101} 102 103int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { 104 using_external_vad_ = true; 105 stream_has_voice_ = has_voice; 106 return apm_->kNoError; 107} 108 109bool VoiceDetectionImpl::stream_has_voice() const { 110 // TODO(ajm): enable this assertion? 111 //assert(using_external_vad_ || is_component_enabled()); 112 return stream_has_voice_; 113} 114 115int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { 116 CriticalSectionScoped crit_scoped(*apm_->crit()); 117 if (MapSetting(likelihood) == -1) { 118 return apm_->kBadParameterError; 119 } 120 121 likelihood_ = likelihood; 122 return Configure(); 123} 124 125VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { 126 return likelihood_; 127} 128 129int VoiceDetectionImpl::set_frame_size_ms(int size) { 130 CriticalSectionScoped crit_scoped(*apm_->crit()); 131 assert(size == 10); // TODO(ajm): remove when supported. 132 if (size != 10 && 133 size != 20 && 134 size != 30) { 135 return apm_->kBadParameterError; 136 } 137 138 frame_size_ms_ = size; 139 140 return Initialize(); 141} 142 143int VoiceDetectionImpl::frame_size_ms() const { 144 return frame_size_ms_; 145} 146 147int VoiceDetectionImpl::Initialize() { 148 int err = ProcessingComponent::Initialize(); 149 if (err != apm_->kNoError || !is_component_enabled()) { 150 return err; 151 } 152 153 using_external_vad_ = false; 154 frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000); 155 // TODO(ajm): intialize frame buffer here. 156 157 return apm_->kNoError; 158} 159 160int VoiceDetectionImpl::get_version(char* version, 161 int version_len_bytes) const { 162 if (WebRtcVad_get_version(version, version_len_bytes) != 0) { 163 return apm_->kBadParameterError; 164 } 165 166 return apm_->kNoError; 167} 168 169void* VoiceDetectionImpl::CreateHandle() const { 170 Handle* handle = NULL; 171 if (WebRtcVad_Create(&handle) != apm_->kNoError) { 172 handle = NULL; 173 } else { 174 assert(handle != NULL); 175 } 176 177 return handle; 178} 179 180int VoiceDetectionImpl::DestroyHandle(void* handle) const { 181 return WebRtcVad_Free(static_cast<Handle*>(handle)); 182} 183 184int VoiceDetectionImpl::InitializeHandle(void* handle) const { 185 return WebRtcVad_Init(static_cast<Handle*>(handle)); 186} 187 188int VoiceDetectionImpl::ConfigureHandle(void* handle) const { 189 return WebRtcVad_set_mode(static_cast<Handle*>(handle), 190 MapSetting(likelihood_)); 191} 192 193int VoiceDetectionImpl::num_handles_required() const { 194 return 1; 195} 196 197int VoiceDetectionImpl::GetHandleError(void* handle) const { 198 // The VAD has no get_error() function. 199 assert(handle != NULL); 200 return apm_->kUnspecifiedError; 201} 202} // namespace webrtc 203