1e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/*
2e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
4e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  Use of this source code is governed by a BSD-style license
5e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  that can be found in the LICENSE file in the root of the source
6e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  tree. An additional intellectual property rights grant can be found
7e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  in the file PATENTS.  All contributing project authors may
8e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  be found in the AUTHORS file in the root of the source tree.
9e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */
10e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
11e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "voice_detection_impl.h"
12e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
13e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include <cassert>
14e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
15e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "critical_section_wrapper.h"
16e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "webrtc_vad.h"
17e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
18e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "audio_processing_impl.h"
19e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "audio_buffer.h"
20e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
21e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentnamespace webrtc {
22e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
23e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurenttypedef VadInst Handle;
24e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
25e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentnamespace {
26e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) {
27e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  switch (likelihood) {
28e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    case VoiceDetection::kVeryLowLikelihood:
29e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      return 3;
30e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      break;
31e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    case VoiceDetection::kLowLikelihood:
32e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      return 2;
33e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      break;
34e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    case VoiceDetection::kModerateLikelihood:
35e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      return 1;
36e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      break;
37e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    case VoiceDetection::kHighLikelihood:
38e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      return 0;
39e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      break;
40e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    default:
41e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      return -1;
42e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
43e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
44e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}  // namespace
45e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
46e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
47e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentVoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
48e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  : ProcessingComponent(apm),
49e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    apm_(apm),
50e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    stream_has_voice_(false),
51e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    using_external_vad_(false),
52e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    likelihood_(kLowLikelihood),
53e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    frame_size_ms_(10),
54e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    frame_size_samples_(0) {}
55e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
56e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentVoiceDetectionImpl::~VoiceDetectionImpl() {}
57e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
58e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
59e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  if (!is_component_enabled()) {
60e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    return apm_->kNoError;
61e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
62e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
63e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  if (using_external_vad_) {
64e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    using_external_vad_ = false;
65e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    return apm_->kNoError;
66e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
67e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  assert(audio->samples_per_split_channel() <= 160);
68e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
69e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
70e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  if (audio->num_channels() > 1) {
71e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    audio->CopyAndMixLowPass(1);
72e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    mixed_data = audio->mixed_low_pass_data(0);
73e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
74e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
75e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // TODO(ajm): concatenate data in frame buffer here.
76e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
77c55a96383497a772a307b346368133960b02ad03Eric Laurent  int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
78c55a96383497a772a307b346368133960b02ad03Eric Laurent                                  apm_->split_sample_rate_hz(),
79c55a96383497a772a307b346368133960b02ad03Eric Laurent                                  mixed_data,
80c55a96383497a772a307b346368133960b02ad03Eric Laurent                                  frame_size_samples_);
81c55a96383497a772a307b346368133960b02ad03Eric Laurent  if (vad_ret == 0) {
82e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    stream_has_voice_ = false;
83c55a96383497a772a307b346368133960b02ad03Eric Laurent    audio->set_activity(AudioFrame::kVadPassive);
84c55a96383497a772a307b346368133960b02ad03Eric Laurent  } else if (vad_ret == 1) {
85e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    stream_has_voice_ = true;
86c55a96383497a772a307b346368133960b02ad03Eric Laurent    audio->set_activity(AudioFrame::kVadActive);
87e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  } else {
88e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    return apm_->kUnspecifiedError;
89e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
90e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
91e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return apm_->kNoError;
92e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
93e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
94e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::Enable(bool enable) {
95e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  CriticalSectionScoped crit_scoped(*apm_->crit());
96e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return EnableComponent(enable);
97e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
98e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
99e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentbool VoiceDetectionImpl::is_enabled() const {
100e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return is_component_enabled();
101e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
102e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
103e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
104e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  using_external_vad_ = true;
105e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  stream_has_voice_ = has_voice;
106e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return apm_->kNoError;
107e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
108e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
109e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentbool VoiceDetectionImpl::stream_has_voice() const {
110e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // TODO(ajm): enable this assertion?
111e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  //assert(using_external_vad_ || is_component_enabled());
112e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return stream_has_voice_;
113e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
114e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
115e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
116e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  CriticalSectionScoped crit_scoped(*apm_->crit());
117e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  if (MapSetting(likelihood) == -1) {
118e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    return apm_->kBadParameterError;
119e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
120e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
121e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  likelihood_ = likelihood;
122e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return Configure();
123e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
124e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
125e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentVoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
126e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return likelihood_;
127e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
128e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
129e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::set_frame_size_ms(int size) {
130e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  CriticalSectionScoped crit_scoped(*apm_->crit());
131e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  assert(size == 10); // TODO(ajm): remove when supported.
132e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  if (size != 10 &&
133e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      size != 20 &&
134e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent      size != 30) {
135e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    return apm_->kBadParameterError;
136e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
137e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
138e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  frame_size_ms_ = size;
139e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
140e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return Initialize();
141e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
142e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
143e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::frame_size_ms() const {
144e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return frame_size_ms_;
145e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
146e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
147e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::Initialize() {
148e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  int err = ProcessingComponent::Initialize();
149e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  if (err != apm_->kNoError || !is_component_enabled()) {
150e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    return err;
151e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
152e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
153e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  using_external_vad_ = false;
154e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
155e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // TODO(ajm): intialize frame buffer here.
156e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
157e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return apm_->kNoError;
158e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
159e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
160e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::get_version(char* version,
161e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent                                    int version_len_bytes) const {
162e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  if (WebRtcVad_get_version(version, version_len_bytes) != 0) {
163e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    return apm_->kBadParameterError;
164e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
165e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
166e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return apm_->kNoError;
167e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
168e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
169e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentvoid* VoiceDetectionImpl::CreateHandle() const {
170e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  Handle* handle = NULL;
171e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  if (WebRtcVad_Create(&handle) != apm_->kNoError) {
172e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    handle = NULL;
173e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  } else {
174e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    assert(handle != NULL);
175e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  }
176e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
177e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return handle;
178e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
179e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
180e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::DestroyHandle(void* handle) const {
181e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return WebRtcVad_Free(static_cast<Handle*>(handle));
182e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
183e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
184e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::InitializeHandle(void* handle) const {
185e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return WebRtcVad_Init(static_cast<Handle*>(handle));
186e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
187e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
188e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::ConfigureHandle(void* handle) const {
189e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return WebRtcVad_set_mode(static_cast<Handle*>(handle),
190e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent                            MapSetting(likelihood_));
191e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
192e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
193e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::num_handles_required() const {
194e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return 1;
195e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
196e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
197e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint VoiceDetectionImpl::GetHandleError(void* handle) const {
198e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  // The VAD has no get_error() function.
199e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  assert(handle != NULL);
200e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent  return apm_->kUnspecifiedError;
201e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}
202e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent}  // namespace webrtc
203