1/*
2 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12/*
13 * This header file includes the descriptions of the core VAD calls.
14 */
15
16#ifndef WEBRTC_VAD_CORE_H_
17#define WEBRTC_VAD_CORE_H_
18
19#include "typedefs.h"
20#include "vad_defines.h"
21
22typedef struct VadInstT_
23{
24
25    WebRtc_Word16 vad;
26    WebRtc_Word32 downsampling_filter_states[4];
27    WebRtc_Word16 noise_means[NUM_TABLE_VALUES];
28    WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
29    WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
30    WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
31    // TODO(bjornv): Change to |frame_count|.
32    WebRtc_Word32 frame_counter;
33    WebRtc_Word16 over_hang; // Over Hang
34    WebRtc_Word16 num_of_speech;
35    // TODO(bjornv): Change to |age_vector|.
36    WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
37    WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
38    // TODO(bjornv): Change to |median|.
39    WebRtc_Word16 mean_value[NUM_CHANNELS];
40    WebRtc_Word16 upper_state[5];
41    WebRtc_Word16 lower_state[5];
42    WebRtc_Word16 hp_filter_state[4];
43    WebRtc_Word16 over_hang_max_1[3];
44    WebRtc_Word16 over_hang_max_2[3];
45    WebRtc_Word16 individual[3];
46    WebRtc_Word16 total[3];
47
48    short init_flag;
49
50} VadInstT;
51
52/****************************************************************************
53 * WebRtcVad_InitCore(...)
54 *
55 * This function initializes a VAD instance
56 *
57 * Input:
58 *      - inst      : Instance that should be initialized
59 *      - mode      : Aggressiveness degree
60 *                    0 (High quality) - 3 (Highly aggressive)
61 *
62 * Output:
63 *      - inst      : Initialized instance
64 *
65 * Return value     :  0 - Ok
66 *                    -1 - Error
67 */
68int WebRtcVad_InitCore(VadInstT* inst, short mode);
69
70/****************************************************************************
71 * WebRtcVad_set_mode_core(...)
72 *
73 * This function changes the VAD settings
74 *
75 * Input:
76 *      - inst      : VAD instance
77 *      - mode      : Aggressiveness degree
78 *                    0 (High quality) - 3 (Highly aggressive)
79 *
80 * Output:
81 *      - inst      : Changed  instance
82 *
83 * Return value     :  0 - Ok
84 *                    -1 - Error
85 */
86
87int WebRtcVad_set_mode_core(VadInstT* inst, short mode);
88
89/****************************************************************************
90 * WebRtcVad_CalcVad32khz(...)
91 * WebRtcVad_CalcVad16khz(...)
92 * WebRtcVad_CalcVad8khz(...)
93 *
94 * Calculate probability for active speech and make VAD decision.
95 *
96 * Input:
97 *      - inst          : Instance that should be initialized
98 *      - speech_frame  : Input speech frame
99 *      - frame_length  : Number of input samples
100 *
101 * Output:
102 *      - inst          : Updated filter states etc.
103 *
104 * Return value         : VAD decision
105 *                        0 - No active speech
106 *                        1-6 - Active speech
107 */
108WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame,
109                                     int frame_length);
110WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame,
111                                     int frame_length);
112WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame,
113                                    int frame_length);
114
115/****************************************************************************
116 * WebRtcVad_GmmProbability(...)
117 *
118 * This function calculates the probabilities for background noise and
119 * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide
120 * which type of signal is most probable.
121 *
122 * Input:
123 *      - inst              : Pointer to VAD instance
124 *      - feature_vector    : Feature vector = log10(energy in frequency band)
125 *      - total_power       : Total power in frame.
126 *      - frame_length      : Number of input samples
127 *
128 * Output:
129 *      VAD decision        : 0 - noise, 1 - speech
130 *
131 */
132WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector,
133                                       WebRtc_Word16 total_power, int frame_length);
134
135#endif // WEBRTC_VAD_CORE_H_
136