1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
12
13#include <assert.h>
14#include <math.h>
15#include <string.h>
16
17#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"
18#include "webrtc/modules/audio_processing/vad/common.h"
19#include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h"
20#include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h"
21#include "webrtc/modules/include/module_common_types.h"
22
23namespace webrtc {
24
25static_assert(kNoiseGmmDim == kVoiceGmmDim,
26              "noise and voice gmm dimension not equal");
27
28// These values should match MATLAB counterparts for unit-tests to pass.
29static const int kPosteriorHistorySize = 500;  // 5 sec of 10 ms frames.
30static const double kInitialPriorProbability = 0.3;
31static const int kTransientWidthThreshold = 7;
32static const double kLowProbabilityThreshold = 0.2;
33
34static double LimitProbability(double p) {
35  const double kLimHigh = 0.99;
36  const double kLimLow = 0.01;
37
38  if (p > kLimHigh)
39    p = kLimHigh;
40  else if (p < kLimLow)
41    p = kLimLow;
42  return p;
43}
44
45PitchBasedVad::PitchBasedVad()
46    : p_prior_(kInitialPriorProbability),
47      circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
48  // Setup noise GMM.
49  noise_gmm_.dimension = kNoiseGmmDim;
50  noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
51  noise_gmm_.weight = kNoiseGmmWeights;
52  noise_gmm_.mean = &kNoiseGmmMean[0][0];
53  noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
54
55  // Setup voice GMM.
56  voice_gmm_.dimension = kVoiceGmmDim;
57  voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
58  voice_gmm_.weight = kVoiceGmmWeights;
59  voice_gmm_.mean = &kVoiceGmmMean[0][0];
60  voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
61}
62
63PitchBasedVad::~PitchBasedVad() {
64}
65
66int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
67                                      double* p_combined) {
68  double p;
69  double gmm_features[3];
70  double pdf_features_given_voice;
71  double pdf_features_given_noise;
72  // These limits are the same in matlab implementation 'VoicingProbGMM().'
73  const double kLimLowLogPitchGain = -2.0;
74  const double kLimHighLogPitchGain = -0.9;
75  const double kLimLowSpectralPeak = 200;
76  const double kLimHighSpectralPeak = 2000;
77  const double kEps = 1e-12;
78  for (size_t n = 0; n < features.num_frames; n++) {
79    gmm_features[0] = features.log_pitch_gain[n];
80    gmm_features[1] = features.spectral_peak[n];
81    gmm_features[2] = features.pitch_lag_hz[n];
82
83    pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
84    pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
85
86    if (features.spectral_peak[n] < kLimLowSpectralPeak ||
87        features.spectral_peak[n] > kLimHighSpectralPeak ||
88        features.log_pitch_gain[n] < kLimLowLogPitchGain) {
89      pdf_features_given_voice = kEps * pdf_features_given_noise;
90    } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
91      pdf_features_given_noise = kEps * pdf_features_given_voice;
92    }
93
94    p = p_prior_ * pdf_features_given_voice /
95        (pdf_features_given_voice * p_prior_ +
96         pdf_features_given_noise * (1 - p_prior_));
97
98    p = LimitProbability(p);
99
100    // Combine pitch-based probability with standalone probability, before
101    // updating prior probabilities.
102    double prod_active = p * p_combined[n];
103    double prod_inactive = (1 - p) * (1 - p_combined[n]);
104    p_combined[n] = prod_active / (prod_active + prod_inactive);
105
106    if (UpdatePrior(p_combined[n]) < 0)
107      return -1;
108    // Limit prior probability. With a zero prior probability the posterior
109    // probability is always zero.
110    p_prior_ = LimitProbability(p_prior_);
111  }
112  return 0;
113}
114
115int PitchBasedVad::UpdatePrior(double p) {
116  circular_buffer_->Insert(p);
117  if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
118                                        kLowProbabilityThreshold) < 0)
119    return -1;
120  p_prior_ = circular_buffer_->Mean();
121  return 0;
122}
123
124}  // namespace webrtc
125