1/*
2 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "voice_detection_impl.h"
12
13#include <cassert>
14
15#include "critical_section_wrapper.h"
16#include "webrtc_vad.h"
17
18#include "audio_processing_impl.h"
19#include "audio_buffer.h"
20
21namespace webrtc {
22
23typedef VadInst Handle;
24
25namespace {
26WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) {
27  switch (likelihood) {
28    case VoiceDetection::kVeryLowLikelihood:
29      return 3;
30      break;
31    case VoiceDetection::kLowLikelihood:
32      return 2;
33      break;
34    case VoiceDetection::kModerateLikelihood:
35      return 1;
36      break;
37    case VoiceDetection::kHighLikelihood:
38      return 0;
39      break;
40    default:
41      return -1;
42  }
43}
44}  // namespace
45
46
47VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
48  : ProcessingComponent(apm),
49    apm_(apm),
50    stream_has_voice_(false),
51    using_external_vad_(false),
52    likelihood_(kLowLikelihood),
53    frame_size_ms_(10),
54    frame_size_samples_(0) {}
55
56VoiceDetectionImpl::~VoiceDetectionImpl() {}
57
58int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
59  if (!is_component_enabled()) {
60    return apm_->kNoError;
61  }
62
63  if (using_external_vad_) {
64    using_external_vad_ = false;
65    return apm_->kNoError;
66  }
67  assert(audio->samples_per_split_channel() <= 160);
68
69  WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
70  if (audio->num_channels() > 1) {
71    audio->CopyAndMixLowPass(1);
72    mixed_data = audio->mixed_low_pass_data(0);
73  }
74
75  // TODO(ajm): concatenate data in frame buffer here.
76
77  int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
78                                  apm_->split_sample_rate_hz(),
79                                  mixed_data,
80                                  frame_size_samples_);
81  if (vad_ret == 0) {
82    stream_has_voice_ = false;
83    audio->set_activity(AudioFrame::kVadPassive);
84  } else if (vad_ret == 1) {
85    stream_has_voice_ = true;
86    audio->set_activity(AudioFrame::kVadActive);
87  } else {
88    return apm_->kUnspecifiedError;
89  }
90
91  return apm_->kNoError;
92}
93
94int VoiceDetectionImpl::Enable(bool enable) {
95  CriticalSectionScoped crit_scoped(*apm_->crit());
96  return EnableComponent(enable);
97}
98
99bool VoiceDetectionImpl::is_enabled() const {
100  return is_component_enabled();
101}
102
103int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
104  using_external_vad_ = true;
105  stream_has_voice_ = has_voice;
106  return apm_->kNoError;
107}
108
109bool VoiceDetectionImpl::stream_has_voice() const {
110  // TODO(ajm): enable this assertion?
111  //assert(using_external_vad_ || is_component_enabled());
112  return stream_has_voice_;
113}
114
115int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
116  CriticalSectionScoped crit_scoped(*apm_->crit());
117  if (MapSetting(likelihood) == -1) {
118    return apm_->kBadParameterError;
119  }
120
121  likelihood_ = likelihood;
122  return Configure();
123}
124
125VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
126  return likelihood_;
127}
128
129int VoiceDetectionImpl::set_frame_size_ms(int size) {
130  CriticalSectionScoped crit_scoped(*apm_->crit());
131  assert(size == 10); // TODO(ajm): remove when supported.
132  if (size != 10 &&
133      size != 20 &&
134      size != 30) {
135    return apm_->kBadParameterError;
136  }
137
138  frame_size_ms_ = size;
139
140  return Initialize();
141}
142
143int VoiceDetectionImpl::frame_size_ms() const {
144  return frame_size_ms_;
145}
146
147int VoiceDetectionImpl::Initialize() {
148  int err = ProcessingComponent::Initialize();
149  if (err != apm_->kNoError || !is_component_enabled()) {
150    return err;
151  }
152
153  using_external_vad_ = false;
154  frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
155  // TODO(ajm): intialize frame buffer here.
156
157  return apm_->kNoError;
158}
159
160int VoiceDetectionImpl::get_version(char* version,
161                                    int version_len_bytes) const {
162  if (WebRtcVad_get_version(version, version_len_bytes) != 0) {
163    return apm_->kBadParameterError;
164  }
165
166  return apm_->kNoError;
167}
168
169void* VoiceDetectionImpl::CreateHandle() const {
170  Handle* handle = NULL;
171  if (WebRtcVad_Create(&handle) != apm_->kNoError) {
172    handle = NULL;
173  } else {
174    assert(handle != NULL);
175  }
176
177  return handle;
178}
179
180int VoiceDetectionImpl::DestroyHandle(void* handle) const {
181  return WebRtcVad_Free(static_cast<Handle*>(handle));
182}
183
184int VoiceDetectionImpl::InitializeHandle(void* handle) const {
185  return WebRtcVad_Init(static_cast<Handle*>(handle));
186}
187
188int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
189  return WebRtcVad_set_mode(static_cast<Handle*>(handle),
190                            MapSetting(likelihood_));
191}
192
193int VoiceDetectionImpl::num_handles_required() const {
194  return 1;
195}
196
197int VoiceDetectionImpl::GetHandleError(void* handle) const {
198  // The VAD has no get_error() function.
199  assert(handle != NULL);
200  return apm_->kUnspecifiedError;
201}
202}  // namespace webrtc
203