1/*
2 *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
12
13#include <algorithm>
14
15#include "webrtc/base/checks.h"
16
17namespace webrtc {
18namespace {
19
20const size_t kMaxLength = 320;
21const size_t kNumChannels = 1;
22
23const double kDefaultVoiceValue = 1.0;
24const double kNeutralProbability = 0.5;
25const double kLowProbability = 0.01;
26
27}  // namespace
28
29VoiceActivityDetector::VoiceActivityDetector()
30    : last_voice_probability_(kDefaultVoiceValue),
31      standalone_vad_(StandaloneVad::Create()) {
32}
33
34// Because ISAC has a different chunk length, it updates
35// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
36// Otherwise it clears them.
37void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
38                                         size_t length,
39                                         int sample_rate_hz) {
40  RTC_DCHECK_EQ(static_cast<int>(length), sample_rate_hz / 100);
41  RTC_DCHECK_LE(length, kMaxLength);
42  // Resample to the required rate.
43  const int16_t* resampled_ptr = audio;
44  if (sample_rate_hz != kSampleRateHz) {
45    RTC_CHECK_EQ(
46        resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
47        0);
48    resampler_.Push(audio, length, resampled_, kLength10Ms, length);
49    resampled_ptr = resampled_;
50  }
51  RTC_DCHECK_EQ(length, kLength10Ms);
52
53  // Each chunk needs to be passed into |standalone_vad_|, because internally it
54  // buffers the audio and processes it all at once when GetActivity() is
55  // called.
56  RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
57
58  audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
59
60  chunkwise_voice_probabilities_.resize(features_.num_frames);
61  chunkwise_rms_.resize(features_.num_frames);
62  std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
63            chunkwise_rms_.begin());
64  if (features_.num_frames > 0) {
65    if (features_.silence) {
66      // The other features are invalid, so set the voice probabilities to an
67      // arbitrary low value.
68      std::fill(chunkwise_voice_probabilities_.begin(),
69                chunkwise_voice_probabilities_.end(), kLowProbability);
70    } else {
71      std::fill(chunkwise_voice_probabilities_.begin(),
72                chunkwise_voice_probabilities_.end(), kNeutralProbability);
73      RTC_CHECK_GE(
74          standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
75                                       chunkwise_voice_probabilities_.size()),
76          0);
77      RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
78                       features_, &chunkwise_voice_probabilities_[0]),
79                   0);
80    }
81    last_voice_probability_ = chunkwise_voice_probabilities_.back();
82  }
83}
84
85}  // namespace webrtc
86