vad_gmm.c revision 3fbf99c44a30f09d4e3402e192067d053ced5c55
1e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov/*
2e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov *
4ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann *  Use of this source code is governed by a BSD-style license
5e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov *  that can be found in the LICENSE file in the root of the source
6e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov *  tree. An additional intellectual property rights grant can be found
7e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov *  in the file PATENTS.  All contributing project authors may
8e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov *  be found in the AUTHORS file in the root of the source tree.
9e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov */
10ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
11ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "webrtc/common_audio/vad/vad_gmm.h"
12ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
13ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
14ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann#include "webrtc/typedefs.h"
15ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
16e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganovstatic const int32_t kCompVar = 22005;
17ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannstatic const int16_t kLog2Exp = 5909;  // log2(exp(1)) in Q12.
18ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
19ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann// For a normal distribution, the probability of |input| is calculated and
20ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann// returned (in Q20). The formula for normal distributed probability is
21ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann//
22ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann// 1 / s * exp(-(x - m)^2 / (2 * s^2))
23ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann//
24ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann// where the parameters are given in the following Q domains:
25ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann// m = |mean| (Q7)
26ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann// s = |std| (Q7)
27ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann// x = |input| (Q4)
28ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann// in addition to the probability we output |delta| (in Q11) used when updating
29ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann// the noise/speech model.
30ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmannint32_t WebRtcVad_GaussianProbability(int16_t input,
31ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                                      int16_t mean,
32ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                                      int16_t std,
33ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann                                      int16_t* delta) {
34ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int16_t tmp16, inv_std, inv_std2, exp_value = 0;
35ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  int32_t tmp32;
36ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
37ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // Calculate |inv_std| = 1 / s, in Q10.
38e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov  // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation.
39ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // Q-domain: Q17 / Q7 = Q10.
40ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  tmp32 = (int32_t) 131072 + (int32_t) (std >> 1);
41ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std);
42ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
43ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // Calculate |inv_std2| = 1 / s^2, in Q14.
44ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  tmp16 = (inv_std >> 2);  // Q10 -> Q8.
45ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // Q-domain: (Q8 * Q8) >> 2 = Q14.
46ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
47ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // TODO(bjornv): Investigate if changing to
48ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // inv_std2 = (int16_t)((inv_std * inv_std) >> 6);
49ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // gives better accuracy.
50ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
51ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  tmp16 = (input << 3);  // Q4 -> Q7
52ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  tmp16 = tmp16 - mean;  // Q7 - Q7 = Q7
53ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
54ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // To be used later, when updating noise/speech model.
55ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // |delta| = (x - m) / s^2, in Q11.
56ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // Q-domain: (Q14 * Q7) >> 10 = Q11.
57e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov  *delta = (int16_t)((inv_std2 * tmp16) >> 10);
58ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
59ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing
60ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // division by two with one shift.
61ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // Q-domain: (Q11 * Q7) >> 8 = Q10.
62ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  tmp32 = (*delta * tmp16) >> 9;
63ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
64ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // If the exponent is small enough to give a non-zero probability we calculate
65ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2))
66ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  //             ~= exp2(-log2(exp(1)) * |tmp32|).
67ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  if (tmp32 < kCompVar) {
68ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10.
69ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    // Q-domain: (Q12 * Q10) >> 12 = Q10.
70e6986e1e8d4a57987f47c215490cb080a65ee29aSvet Ganov    tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
71ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    tmp16 = -tmp16;
72ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    exp_value = (0x0400 | (tmp16 & 0x03FF));
73ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    tmp16 ^= 0xFFFF;
74ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    tmp16 >>= 10;
75ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    tmp16 += 1;
76ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    // Get |exp_value| = exp(-|tmp32|) in Q10.
77ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann    exp_value >>= tmp16;
78ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  }
79ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann
80ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20.
81ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  // Q-domain: Q10 * Q10 = Q20.
82ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann  return inv_std * exp_value;
83ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann}
84ac3d58cff7c80b0ef56bf55130d91da17cbaa3c4Philip P. Moltmann