12a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/*
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Use of this source code is governed by a BSD-style license
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  that can be found in the LICENSE file in the root of the source
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  tree. An additional intellectual property rights grant can be found
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  in the file PATENTS.  All contributing project authors may
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  be found in the AUTHORS file in the root of the source tree.
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "voice_detection_impl.h"
12116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <cassert>
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "critical_section_wrapper.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "webrtc_vad.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "audio_processing_impl.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "audio_buffer.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace webrtc {
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef VadInst Handle;
242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) {
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (likelihood) {
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case VoiceDetection::kVeryLowLikelihood:
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return 3;
30116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      break;
31116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    case VoiceDetection::kLowLikelihood:
32116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      return 2;
33116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      break;
34116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    case VoiceDetection::kModerateLikelihood:
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return 1;
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case VoiceDetection::kHighLikelihood:
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return 0;
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
40    default:
41      return -1;
42  }
43}
44}  // namespace
45
46
47VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
48  : ProcessingComponent(apm),
49    apm_(apm),
50    stream_has_voice_(false),
51    using_external_vad_(false),
52    likelihood_(kLowLikelihood),
53    frame_size_ms_(10),
54    frame_size_samples_(0) {}
55
56VoiceDetectionImpl::~VoiceDetectionImpl() {}
57
58int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
59  if (!is_component_enabled()) {
60    return apm_->kNoError;
61  }
62
63  if (using_external_vad_) {
64    using_external_vad_ = false;
65    return apm_->kNoError;
66  }
67  assert(audio->samples_per_split_channel() <= 160);
68
69  WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
70  if (audio->num_channels() > 1) {
71    audio->CopyAndMixLowPass(1);
72    mixed_data = audio->mixed_low_pass_data(0);
73  }
74
75  // TODO(ajm): concatenate data in frame buffer here.
76
77  int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
78                                  apm_->split_sample_rate_hz(),
79                                  mixed_data,
80                                  frame_size_samples_);
81  if (vad_ret == 0) {
82    stream_has_voice_ = false;
83    audio->set_activity(AudioFrame::kVadPassive);
84  } else if (vad_ret == 1) {
85    stream_has_voice_ = true;
86    audio->set_activity(AudioFrame::kVadActive);
87  } else {
88    return apm_->kUnspecifiedError;
89  }
90
91  return apm_->kNoError;
92}
93
94int VoiceDetectionImpl::Enable(bool enable) {
95  CriticalSectionScoped crit_scoped(*apm_->crit());
96  return EnableComponent(enable);
97}
98
99bool VoiceDetectionImpl::is_enabled() const {
100  return is_component_enabled();
101}
102
103int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
104  using_external_vad_ = true;
105  stream_has_voice_ = has_voice;
106  return apm_->kNoError;
107}
108
109bool VoiceDetectionImpl::stream_has_voice() const {
110  // TODO(ajm): enable this assertion?
111  //assert(using_external_vad_ || is_component_enabled());
112  return stream_has_voice_;
113}
114
115int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
116  CriticalSectionScoped crit_scoped(*apm_->crit());
117  if (MapSetting(likelihood) == -1) {
118    return apm_->kBadParameterError;
119  }
120
121  likelihood_ = likelihood;
122  return Configure();
123}
124
125VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
126  return likelihood_;
127}
128
129int VoiceDetectionImpl::set_frame_size_ms(int size) {
130  CriticalSectionScoped crit_scoped(*apm_->crit());
131  assert(size == 10); // TODO(ajm): remove when supported.
132  if (size != 10 &&
133      size != 20 &&
134      size != 30) {
135    return apm_->kBadParameterError;
136  }
137
138  frame_size_ms_ = size;
139
140  return Initialize();
141}
142
143int VoiceDetectionImpl::frame_size_ms() const {
144  return frame_size_ms_;
145}
146
147int VoiceDetectionImpl::Initialize() {
148  int err = ProcessingComponent::Initialize();
149  if (err != apm_->kNoError || !is_component_enabled()) {
150    return err;
151  }
152
153  using_external_vad_ = false;
154  frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
155  // TODO(ajm): intialize frame buffer here.
156
157  return apm_->kNoError;
158}
159
160int VoiceDetectionImpl::get_version(char* version,
161                                    int version_len_bytes) const {
162  if (WebRtcVad_get_version(version, version_len_bytes) != 0) {
163    return apm_->kBadParameterError;
164  }
165
166  return apm_->kNoError;
167}
168
169void* VoiceDetectionImpl::CreateHandle() const {
170  Handle* handle = NULL;
171  if (WebRtcVad_Create(&handle) != apm_->kNoError) {
172    handle = NULL;
173  } else {
174    assert(handle != NULL);
175  }
176
177  return handle;
178}
179
180int VoiceDetectionImpl::DestroyHandle(void* handle) const {
181  return WebRtcVad_Free(static_cast<Handle*>(handle));
182}
183
184int VoiceDetectionImpl::InitializeHandle(void* handle) const {
185  return WebRtcVad_Init(static_cast<Handle*>(handle));
186}
187
188int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
189  return WebRtcVad_set_mode(static_cast<Handle*>(handle),
190                            MapSetting(likelihood_));
191}
192
193int VoiceDetectionImpl::num_handles_required() const {
194  return 1;
195}
196
197int VoiceDetectionImpl::GetHandleError(void* handle) const {
198  // The VAD has no get_error() function.
199  assert(handle != NULL);
200  return apm_->kUnspecifiedError;
201}
202}  // namespace webrtc
203