1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/voice_detection_impl.h"
12
13#include "webrtc/common_audio/vad/include/webrtc_vad.h"
14#include "webrtc/modules/audio_processing/audio_buffer.h"
15
16namespace webrtc {
17class VoiceDetectionImpl::Vad {
18 public:
19  Vad() {
20    state_ = WebRtcVad_Create();
21    RTC_CHECK(state_);
22    int error = WebRtcVad_Init(state_);
23    RTC_DCHECK_EQ(0, error);
24  }
25  ~Vad() {
26    WebRtcVad_Free(state_);
27  }
28  VadInst* state() { return state_; }
29 private:
30  VadInst* state_ = nullptr;
31  RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
32};
33
34VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
35    : crit_(crit) {
36  RTC_DCHECK(crit);
37}
38
39VoiceDetectionImpl::~VoiceDetectionImpl() {}
40
41void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
42  rtc::CritScope cs(crit_);
43  sample_rate_hz_ = sample_rate_hz;
44  rtc::scoped_ptr<Vad> new_vad;
45  if (enabled_) {
46    new_vad.reset(new Vad());
47  }
48  vad_.swap(new_vad);
49  using_external_vad_ = false;
50  frame_size_samples_ =
51      static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
52  set_likelihood(likelihood_);
53}
54
55void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
56  rtc::CritScope cs(crit_);
57  if (!enabled_) {
58    return;
59  }
60  if (using_external_vad_) {
61    using_external_vad_ = false;
62    return;
63  }
64
65  RTC_DCHECK_GE(160u, audio->num_frames_per_band());
66  // TODO(ajm): concatenate data in frame buffer here.
67  int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
68                                  audio->mixed_low_pass_data(),
69                                  frame_size_samples_);
70  if (vad_ret == 0) {
71    stream_has_voice_ = false;
72    audio->set_activity(AudioFrame::kVadPassive);
73  } else if (vad_ret == 1) {
74    stream_has_voice_ = true;
75    audio->set_activity(AudioFrame::kVadActive);
76  } else {
77    RTC_NOTREACHED();
78  }
79}
80
81int VoiceDetectionImpl::Enable(bool enable) {
82  rtc::CritScope cs(crit_);
83  if (enabled_ != enable) {
84    enabled_ = enable;
85    Initialize(sample_rate_hz_);
86  }
87  return AudioProcessing::kNoError;
88}
89
90bool VoiceDetectionImpl::is_enabled() const {
91  rtc::CritScope cs(crit_);
92  return enabled_;
93}
94
95int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
96  rtc::CritScope cs(crit_);
97  using_external_vad_ = true;
98  stream_has_voice_ = has_voice;
99  return AudioProcessing::kNoError;
100}
101
102bool VoiceDetectionImpl::stream_has_voice() const {
103  rtc::CritScope cs(crit_);
104  // TODO(ajm): enable this assertion?
105  //assert(using_external_vad_ || is_component_enabled());
106  return stream_has_voice_;
107}
108
109int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
110  rtc::CritScope cs(crit_);
111  likelihood_ = likelihood;
112  if (enabled_) {
113    int mode = 2;
114    switch (likelihood) {
115      case VoiceDetection::kVeryLowLikelihood:
116        mode = 3;
117        break;
118      case VoiceDetection::kLowLikelihood:
119        mode = 2;
120        break;
121      case VoiceDetection::kModerateLikelihood:
122        mode = 1;
123        break;
124      case VoiceDetection::kHighLikelihood:
125        mode = 0;
126        break;
127      default:
128        RTC_NOTREACHED();
129        break;
130    }
131    int error = WebRtcVad_set_mode(vad_->state(), mode);
132    RTC_DCHECK_EQ(0, error);
133  }
134  return AudioProcessing::kNoError;
135}
136
137VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
138  rtc::CritScope cs(crit_);
139  return likelihood_;
140}
141
142int VoiceDetectionImpl::set_frame_size_ms(int size) {
143  rtc::CritScope cs(crit_);
144  RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
145  frame_size_ms_ = size;
146  Initialize(sample_rate_hz_);
147  return AudioProcessing::kNoError;
148}
149
150int VoiceDetectionImpl::frame_size_ms() const {
151  rtc::CritScope cs(crit_);
152  return frame_size_ms_;
153}
154}  // namespace webrtc
155