1e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/* 2e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 4e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Use of this source code is governed by a BSD-style license 5e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * that can be found in the LICENSE file in the root of the source 6e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * tree. An additional intellectual property rights grant can be found 7e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * in the file PATENTS. All contributing project authors may 8e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * be found in the AUTHORS file in the root of the source tree. 9e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */ 10e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 11e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 12e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/* 13e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * This header file includes the descriptions of the core VAD calls. 14e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */ 15e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 16e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#ifndef WEBRTC_VAD_CORE_H_ 17e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#define WEBRTC_VAD_CORE_H_ 18e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 19e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "typedefs.h" 20e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#include "vad_defines.h" 21e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 22e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurenttypedef struct VadInstT_ 23e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent{ 24e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 25e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 vad; 26e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word32 downsampling_filter_states[4]; 27e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 noise_means[NUM_TABLE_VALUES]; 28e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 speech_means[NUM_TABLE_VALUES]; 29e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 noise_stds[NUM_TABLE_VALUES]; 30e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 speech_stds[NUM_TABLE_VALUES]; 31c55a96383497a772a307b346368133960b02ad03Eric Laurent // TODO(bjornv): Change to |frame_count|. 32e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word32 frame_counter; 33e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 over_hang; // Over Hang 34e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 num_of_speech; 35c55a96383497a772a307b346368133960b02ad03Eric Laurent // TODO(bjornv): Change to |age_vector|. 36e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 index_vector[16 * NUM_CHANNELS]; 37e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS]; 38c55a96383497a772a307b346368133960b02ad03Eric Laurent // TODO(bjornv): Change to |median|. 39e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 mean_value[NUM_CHANNELS]; 40e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 upper_state[5]; 41e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 lower_state[5]; 42e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 hp_filter_state[4]; 43e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 over_hang_max_1[3]; 44e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 over_hang_max_2[3]; 45e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 individual[3]; 46e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 total[3]; 47e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 48e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent short init_flag; 49e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 50e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent} VadInstT; 51e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 52e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/**************************************************************************** 53e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_InitCore(...) 54e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 55e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * This function initializes a VAD instance 56e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 57e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Input: 58e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - inst : Instance that should be initialized 59e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - mode : Aggressiveness degree 60e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 0 (High quality) - 3 (Highly aggressive) 61e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 62e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Output: 63e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - inst : Initialized instance 64e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 65e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Return value : 0 - Ok 66e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * -1 - Error 67e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */ 68e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint WebRtcVad_InitCore(VadInstT* inst, short mode); 69e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 70e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/**************************************************************************** 71e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_set_mode_core(...) 72e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 73e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * This function changes the VAD settings 74e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 75e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Input: 76e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - inst : VAD instance 77e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - mode : Aggressiveness degree 78e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 0 (High quality) - 3 (Highly aggressive) 79e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 80e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Output: 81e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - inst : Changed instance 82e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 83e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Return value : 0 - Ok 84e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * -1 - Error 85e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */ 86e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 87e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurentint WebRtcVad_set_mode_core(VadInstT* inst, short mode); 88e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 89e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/**************************************************************************** 90e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_CalcVad32khz(...) 91e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_CalcVad16khz(...) 92e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_CalcVad8khz(...) 93e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 94e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Calculate probability for active speech and make VAD decision. 95e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 96e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Input: 97e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - inst : Instance that should be initialized 98e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - speech_frame : Input speech frame 99e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - frame_length : Number of input samples 100e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 101e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Output: 102e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - inst : Updated filter states etc. 103e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 104e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Return value : VAD decision 105e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 0 - No active speech 106e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 1-6 - Active speech 107e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */ 108e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame, 109e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent int frame_length); 110e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame, 111e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent int frame_length); 112e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame, 113e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent int frame_length); 114e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 115e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent/**************************************************************************** 116e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * WebRtcVad_GmmProbability(...) 117e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 118e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * This function calculates the probabilities for background noise and 119e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide 120e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * which type of signal is most probable. 121e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 122e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Input: 123e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - inst : Pointer to VAD instance 124e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - feature_vector : Feature vector = log10(energy in frequency band) 125e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - total_power : Total power in frame. 126e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * - frame_length : Number of input samples 127e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 128e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * Output: 129e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * VAD decision : 0 - noise, 1 - speech 130e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent * 131e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent */ 132e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric LaurentWebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector, 133e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent WebRtc_Word16 total_power, int frame_length); 134e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent 135e48d5845c8b35de2ab73ea055c18a61fa3a9f0beEric Laurent#endif // WEBRTC_VAD_CORE_H_ 136