1470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com/* 2152c34cf11927193c0b29ddf2545e392e603aa82bjornv@webrtc.org * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com * 4470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com * Use of this source code is governed by a BSD-style license 5470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com * that can be found in the LICENSE file in the root of the source 6470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com * tree. An additional intellectual property rights grant can be found 7470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com * in the file PATENTS. All contributing project authors may 8470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com * be found in the AUTHORS file in the root of the source tree. 9470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com */ 10470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 11470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 12470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com/* 13470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com * This header file includes the VAD API calls. Specific function calls are given below. 14470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com */ 15470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 16b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org#ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 170c6f931420f1e8b697e31810c0b821bbd3d50585bjornv@webrtc.org#define WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ 18470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 19dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting#include <stddef.h> 20dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting 21aa30bb7ef5b02c9026dc2c036a0bed9999ae4cf2pbos@webrtc.org#include "webrtc/typedefs.h" 22470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 23470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.comtypedef struct WebRtcVadInst VadInst; 24470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 25470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com#ifdef __cplusplus 26ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.orgextern "C" { 27470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com#endif 28470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 2926e8a58130160ca2be411ed7ea2097a1d0252090bjornv@webrtc.org// Creates an instance to the VAD structure. 30de4703c5d1290da22feeb708fe915179884e210fBjorn VolckerVadInst* WebRtcVad_Create(); 31470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 3226e8a58130160ca2be411ed7ea2097a1d0252090bjornv@webrtc.org// Frees the dynamic memory of a specified VAD instance. 3326e8a58130160ca2be411ed7ea2097a1d0252090bjornv@webrtc.org// 3426e8a58130160ca2be411ed7ea2097a1d0252090bjornv@webrtc.org// - handle [i] : Pointer to VAD instance that should be freed. 352a796720f8b2b4e6fbcbf58e687781e412e4dc38bjornv@webrtc.orgvoid WebRtcVad_Free(VadInst* handle); 36470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 37ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org// Initializes a VAD instance. 38ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org// 39ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org// - handle [i/o] : Instance that should be initialized. 40ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org// 41ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org// returns : 0 - (OK), 4278f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// -1 - (NULL pointer or Default mode could not be set). 43ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.orgint WebRtcVad_Init(VadInst* handle); 44470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 4578f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more 4678f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// restrictive in reporting speech. Put in other words the probability of being 4778f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// speech when the VAD returns 1 is increased with increasing mode. As a 4878f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// consequence also the missed detection rate goes up. 4978f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// 5078f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// - handle [i/o] : VAD instance. 5178f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// - mode [i] : Aggressiveness mode (0, 1, 2, or 3). 5278f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// 5378f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// returns : 0 - (OK), 5478f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// -1 - (NULL pointer, mode could not be set or the VAD instance 5578f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// has not been initialized). 5678f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.orgint WebRtcVad_set_mode(VadInst* handle, int mode); 57470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 58b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// Calculates a VAD decision for the |audio_frame|. For valid sampling rates 59b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). 60b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// 61b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - handle [i/o] : VAD Instance. Needs to be initialized by 62b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// WebRtcVad_Init() before call. 63b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 64b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - audio_frame [i] : Audio frame buffer. 65b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - frame_length [i] : Length of audio frame buffer in number of samples. 66b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// 67b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// returns : 1 - (Active Voice), 68b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// 0 - (Non-active Voice), 69b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// -1 - (Error) 7065f933899b815b6c09f8ca7beefeace09ee3ae70andrew@webrtc.orgint WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, 71dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t frame_length); 72470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 73b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// Checks for valid combinations of |rate| and |frame_length|. We support 10, 74b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. 75b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// 76b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - rate [i] : Sampling frequency (Hz). 77b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - frame_length [i] : Speech frame buffer length in number of samples. 78b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// 79b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// returns : 0 - (valid combination), -1 - (invalid combination) 80dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kastingint WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length); 81b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org 82470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com#ifdef __cplusplus 83470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com} 84470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com#endif 85470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com 86b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org#endif // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 87