1b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org/*
2b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
4b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  Use of this source code is governed by a BSD-style license
5b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  that can be found in the LICENSE file in the root of the source
6b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  tree. An additional intellectual property rights grant can be found
7b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  in the file PATENTS.  All contributing project authors may
8b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
9b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org */
10b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
119fb16139d917ba32720e031d3c871987d418668fpbos@webrtc.org#include "webrtc/modules/audio_processing/voice_detection_impl.h"
12b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
133f45c2e0ac4cb280f941efa3a3476895795e3dd6pbos@webrtc.org#include <assert.h>
14b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
159fb16139d917ba32720e031d3c871987d418668fpbos@webrtc.org#include "webrtc/common_audio/vad/include/webrtc_vad.h"
169fb16139d917ba32720e031d3c871987d418668fpbos@webrtc.org#include "webrtc/modules/audio_processing/audio_buffer.h"
17a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
18b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
19b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgnamespace webrtc {
20b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
21b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgtypedef VadInst Handle;
22b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
23b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgnamespace {
24b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint MapSetting(VoiceDetection::Likelihood likelihood) {
25b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  switch (likelihood) {
26b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    case VoiceDetection::kVeryLowLikelihood:
27b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      return 3;
28b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    case VoiceDetection::kLowLikelihood:
29b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      return 2;
30b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    case VoiceDetection::kModerateLikelihood:
31b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      return 1;
32b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    case VoiceDetection::kHighLikelihood:
33b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      return 0;
34b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
35b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  assert(false);
36b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return -1;
37b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
38b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}  // namespace
39b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
40a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.orgVoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
41a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org                                       CriticalSectionWrapper* crit)
42a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org  : ProcessingComponent(),
43b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    apm_(apm),
44a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org    crit_(crit),
45b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    stream_has_voice_(false),
46b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    using_external_vad_(false),
47b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    likelihood_(kLowLikelihood),
48b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    frame_size_ms_(10),
49b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    frame_size_samples_(0) {}
50b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
51b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgVoiceDetectionImpl::~VoiceDetectionImpl() {}
52b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
53b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
54b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (!is_component_enabled()) {
55b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    return apm_->kNoError;
56b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
57b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
58b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (using_external_vad_) {
59b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    using_external_vad_ = false;
60b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    return apm_->kNoError;
61b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
62b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  assert(audio->samples_per_split_channel() <= 160);
63b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
64b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // TODO(ajm): concatenate data in frame buffer here.
65b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
66b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
67467f7567c8befea153861e09f048f69932d6e3bdandrew@webrtc.org                                  apm_->proc_split_sample_rate_hz(),
68eb15100c9bdb4c97ffda2c05a934aab270795c27aluebs@webrtc.org                                  audio->mixed_low_pass_data(),
69b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                                  frame_size_samples_);
70b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (vad_ret == 0) {
71b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    stream_has_voice_ = false;
72b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    audio->set_activity(AudioFrame::kVadPassive);
73b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  } else if (vad_ret == 1) {
74b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    stream_has_voice_ = true;
75b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    audio->set_activity(AudioFrame::kVadActive);
76b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  } else {
77b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    return apm_->kUnspecifiedError;
78b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
79b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
80b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return apm_->kNoError;
81b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
82b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
83b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::Enable(bool enable) {
84a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org  CriticalSectionScoped crit_scoped(crit_);
85b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return EnableComponent(enable);
86b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
87b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
88b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgbool VoiceDetectionImpl::is_enabled() const {
89b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return is_component_enabled();
90b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
91b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
92b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
93b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  using_external_vad_ = true;
94b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  stream_has_voice_ = has_voice;
95b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return apm_->kNoError;
96b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
97b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
98b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgbool VoiceDetectionImpl::stream_has_voice() const {
99b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // TODO(ajm): enable this assertion?
100b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  //assert(using_external_vad_ || is_component_enabled());
101b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return stream_has_voice_;
102b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
103b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
104b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
105a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org  CriticalSectionScoped crit_scoped(crit_);
106b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (MapSetting(likelihood) == -1) {
107b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    return apm_->kBadParameterError;
108b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
109b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
110b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  likelihood_ = likelihood;
111b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return Configure();
112b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
113b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
114b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgVoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
115b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return likelihood_;
116b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
117b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
118b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::set_frame_size_ms(int size) {
119a1a60018a1f1ec863451ad0ed4eae58239882920andrew@webrtc.org  CriticalSectionScoped crit_scoped(crit_);
120b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  assert(size == 10); // TODO(ajm): remove when supported.
121b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (size != 10 &&
122b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      size != 20 &&
123b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org      size != 30) {
124b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    return apm_->kBadParameterError;
125b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
126b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
127b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  frame_size_ms_ = size;
128b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
129b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return Initialize();
130b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
131b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
132b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::frame_size_ms() const {
133b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return frame_size_ms_;
134b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
135b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
136b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::Initialize() {
137b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  int err = ProcessingComponent::Initialize();
138b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (err != apm_->kNoError || !is_component_enabled()) {
139b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    return err;
140b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
141b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
142b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  using_external_vad_ = false;
143467f7567c8befea153861e09f048f69932d6e3bdandrew@webrtc.org  frame_size_samples_ = frame_size_ms_ *
144467f7567c8befea153861e09f048f69932d6e3bdandrew@webrtc.org      apm_->proc_split_sample_rate_hz() / 1000;
145b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // TODO(ajm): intialize frame buffer here.
146b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
147b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return apm_->kNoError;
148b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
149b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
150b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgvoid* VoiceDetectionImpl::CreateHandle() const {
151b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  Handle* handle = NULL;
152b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  if (WebRtcVad_Create(&handle) != apm_->kNoError) {
153b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    handle = NULL;
154b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  } else {
155b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    assert(handle != NULL);
156b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  }
157b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
158b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return handle;
159b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
160b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
161ffc2de0133683fb103eef61f21134f469bc099dbbjornv@webrtc.orgvoid VoiceDetectionImpl::DestroyHandle(void* handle) const {
162642e80e5fed3b05fdada3dd8b5dbdfabbccaa5cebjornv@webrtc.org  WebRtcVad_Free(static_cast<Handle*>(handle));
163b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
164b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
165b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::InitializeHandle(void* handle) const {
166b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return WebRtcVad_Init(static_cast<Handle*>(handle));
167b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
168b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
169b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::ConfigureHandle(void* handle) const {
170b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return WebRtcVad_set_mode(static_cast<Handle*>(handle),
171b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                            MapSetting(likelihood_));
172b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
173b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
174b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::num_handles_required() const {
175b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return 1;
176b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
177b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
178b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint VoiceDetectionImpl::GetHandleError(void* handle) const {
179b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  // The VAD has no get_error() function.
180b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  assert(handle != NULL);
181b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  return apm_->kUnspecifiedError;
182b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}
183b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org}  // namespace webrtc
184