1e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/*
2e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
4e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  Use of this source code is governed by a BSD-style license
5e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  that can be found in the LICENSE file in the root of the source
6e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  tree. An additional intellectual property rights grant can be found
7e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  in the file PATENTS.  All contributing project authors may
8e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *  be found in the AUTHORS file in the root of the source tree.
9e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */
10e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
11e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
12e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/*
13e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * This header file includes the descriptions of the core VAD calls.
14e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */
15e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
16e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#ifndef WEBRTC_VAD_CORE_H_
17e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#define WEBRTC_VAD_CORE_H_
18e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
19e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "typedefs.h"
20e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "vad_defines.h"
21e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
22e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurenttypedef struct VadInstT_
23e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent{
24e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
25e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 vad;
26e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word32 downsampling_filter_states[4];
27e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 noise_means[NUM_TABLE_VALUES];
28e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
29e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
30e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
31c55a96383497a772a307b346368133960b02ad03Eric Laurent    // TODO(bjornv): Change to |frame_count|.
32e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word32 frame_counter;
33e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 over_hang; // Over Hang
34e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 num_of_speech;
35c55a96383497a772a307b346368133960b02ad03Eric Laurent    // TODO(bjornv): Change to |age_vector|.
36e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
37e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
38c55a96383497a772a307b346368133960b02ad03Eric Laurent    // TODO(bjornv): Change to |median|.
39e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 mean_value[NUM_CHANNELS];
40e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 upper_state[5];
41e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 lower_state[5];
42e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 hp_filter_state[4];
43e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 over_hang_max_1[3];
44e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 over_hang_max_2[3];
45e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 individual[3];
46e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    WebRtc_Word16 total[3];
47e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
48e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent    short init_flag;
49e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
50e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} VadInstT;
51e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
52e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/****************************************************************************
53e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_InitCore(...)
54e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
55e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * This function initializes a VAD instance
56e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
57e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Input:
58e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - inst      : Instance that should be initialized
59e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - mode      : Aggressiveness degree
60e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *                    0 (High quality) - 3 (Highly aggressive)
61e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
62e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Output:
63e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - inst      : Initialized instance
64e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
65e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Return value     :  0 - Ok
66e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *                    -1 - Error
67e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */
68e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint WebRtcVad_InitCore(VadInstT* inst, short mode);
69e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
70e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/****************************************************************************
71e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_set_mode_core(...)
72e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
73e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * This function changes the VAD settings
74e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
75e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Input:
76e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - inst      : VAD instance
77e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - mode      : Aggressiveness degree
78e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *                    0 (High quality) - 3 (Highly aggressive)
79e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
80e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Output:
81e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - inst      : Changed  instance
82e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
83e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Return value     :  0 - Ok
84e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *                    -1 - Error
85e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */
86e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
87e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint WebRtcVad_set_mode_core(VadInstT* inst, short mode);
88e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
89e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/****************************************************************************
90e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_CalcVad32khz(...)
91e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_CalcVad16khz(...)
92e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_CalcVad8khz(...)
93e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
94e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Calculate probability for active speech and make VAD decision.
95e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
96e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Input:
97e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - inst          : Instance that should be initialized
98e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - speech_frame  : Input speech frame
99e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - frame_length  : Number of input samples
100e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
101e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Output:
102e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - inst          : Updated filter states etc.
103e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
104e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Return value         : VAD decision
105e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *                        0 - No active speech
106e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *                        1-6 - Active speech
107e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */
108e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame,
109e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent                                     int frame_length);
110e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame,
111e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent                                     int frame_length);
112e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame,
113e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent                                    int frame_length);
114e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
115e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/****************************************************************************
116e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_GmmProbability(...)
117e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
118e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * This function calculates the probabilities for background noise and
119e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide
120e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * which type of signal is most probable.
121e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
122e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Input:
123e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - inst              : Pointer to VAD instance
124e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - feature_vector    : Feature vector = log10(energy in frequency band)
125e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - total_power       : Total power in frame.
126e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      - frame_length      : Number of input samples
127e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
128e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Output:
129e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *      VAD decision        : 0 - noise, 1 - speech
130e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent *
131e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */
132e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector,
133e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent                                       WebRtc_Word16 total_power, int frame_length);
134e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent
135e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#endif // WEBRTC_VAD_CORE_H_
136