1b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org/*
2b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
4b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  Use of this source code is governed by a BSD-style license
5b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  that can be found in the LICENSE file in the root of the source
6b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  tree. An additional intellectual property rights grant can be found
7b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  in the file PATENTS.  All contributing project authors may
8b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
9b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org */
10b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
11b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
12b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org/*
13b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * This header file includes the descriptions of the core VAD calls.
14b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org */
15b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
16b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
17b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org#define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
18b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
19f24ac5923cbe5e806fac59a0d15e32567553ce8epbos@webrtc.org#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
20f24ac5923cbe5e806fac59a0d15e32567553ce8epbos@webrtc.org#include "webrtc/typedefs.h"
21b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
22b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgenum { kNumChannels = 6 };  // Number of frequency bands (named channels).
23b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgenum { kNumGaussians = 2 };  // Number of Gaussians per channel in the GMM.
24b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgenum { kTableSize = kNumChannels * kNumGaussians };
25b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgenum { kMinEnergy = 10 };  // Minimum energy required to trigger audio signal.
26b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
27b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgtypedef struct VadInstT_
28b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org{
29b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
30b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int vad;
31b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int32_t downsampling_filter_states[4];
32b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    WebRtcSpl_State48khzTo8khz state_48_to_8;
33b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t noise_means[kTableSize];
34b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t speech_means[kTableSize];
35b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t noise_stds[kTableSize];
36b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t speech_stds[kTableSize];
37b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // TODO(bjornv): Change to |frame_count|.
38b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int32_t frame_counter;
39b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t over_hang; // Over Hang
40b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t num_of_speech;
41b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // TODO(bjornv): Change to |age_vector|.
42b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t index_vector[16 * kNumChannels];
43b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t low_value_vector[16 * kNumChannels];
44b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    // TODO(bjornv): Change to |median|.
45b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t mean_value[kNumChannels];
46b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t upper_state[5];
47b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t lower_state[5];
48b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t hp_filter_state[4];
49b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t over_hang_max_1[3];
50b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t over_hang_max_2[3];
51b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t individual[3];
52b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int16_t total[3];
53b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
54b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org    int init_flag;
55b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
56b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org} VadInstT;
57b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
58b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// Initializes the core VAD component. The default aggressiveness mode is
59b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// controlled by |kDefaultMode| in vad_core.c.
60b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org//
61b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// - self [i/o] : Instance that should be initialized
62b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org//
63b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org// returns      : 0 (OK), -1 (NULL pointer in or if the default mode can't be
64b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org//                set)
65b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint WebRtcVad_InitCore(VadInstT* self);
66b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
67b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org/****************************************************************************
68b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * WebRtcVad_set_mode_core(...)
69b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
70b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * This function changes the VAD settings
71b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
72b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Input:
73b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *      - inst      : VAD instance
74b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *      - mode      : Aggressiveness degree
75b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *                    0 (High quality) - 3 (Highly aggressive)
76b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
77b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Output:
78b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *      - inst      : Changed  instance
79b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
80b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Return value     :  0 - Ok
81b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *                    -1 - Error
82b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org */
83b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
84b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgint WebRtcVad_set_mode_core(VadInstT* self, int mode);
85b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
86b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org/****************************************************************************
87b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * WebRtcVad_CalcVad48khz(...)
88785c2fdd22c9ad2d172c0976d1224bbe44073bc7andrew@webrtc.org * WebRtcVad_CalcVad32khz(...)
89785c2fdd22c9ad2d172c0976d1224bbe44073bc7andrew@webrtc.org * WebRtcVad_CalcVad16khz(...)
90785c2fdd22c9ad2d172c0976d1224bbe44073bc7andrew@webrtc.org * WebRtcVad_CalcVad8khz(...)
91b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
92b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Calculate probability for active speech and make VAD decision.
93b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
94b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Input:
95b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *      - inst          : Instance that should be initialized
96b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *      - speech_frame  : Input speech frame
97b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *      - frame_length  : Number of input samples
98b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
99b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Output:
100b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *      - inst          : Updated filter states etc.
101b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *
102b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org * Return value         : VAD decision
103b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *                        0 - No active speech
104b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org *                        1-6 - Active speech
105b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org */
106785c2fdd22c9ad2d172c0976d1224bbe44073bc7andrew@webrtc.orgint WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
107b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                           int frame_length);
108785c2fdd22c9ad2d172c0976d1224bbe44073bc7andrew@webrtc.orgint WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
109b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                           int frame_length);
110785c2fdd22c9ad2d172c0976d1224bbe44073bc7andrew@webrtc.orgint WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
111b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                           int frame_length);
112785c2fdd22c9ad2d172c0976d1224bbe44073bc7andrew@webrtc.orgint WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,
113b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org                          int frame_length);
114b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
115b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org#endif  // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
116