1470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com/*
2152c34cf11927193c0b29ddf2545e392e603aa82bjornv@webrtc.org *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com *
4470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com *  Use of this source code is governed by a BSD-style license
5470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com *  that can be found in the LICENSE file in the root of the source
6470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com *  tree. An additional intellectual property rights grant can be found
7470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com *  in the file PATENTS.  All contributing project authors may
8470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com *  be found in the AUTHORS file in the root of the source tree.
9470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com */
10470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
11470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
12470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com/*
13470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com * This header file includes the VAD API calls. Specific function calls are given below.
14470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com */
15470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
16b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org#ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
170c6f931420f1e8b697e31810c0b821bbd3d50585bjornv@webrtc.org#define WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
18470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
19dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting#include <stddef.h>
20dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting
21aa30bb7ef5b02c9026dc2c036a0bed9999ae4cf2pbos@webrtc.org#include "webrtc/typedefs.h"
22470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
23470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.comtypedef struct WebRtcVadInst VadInst;
24470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
25470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com#ifdef __cplusplus
26ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.orgextern "C" {
27470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com#endif
28470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
2926e8a58130160ca2be411ed7ea2097a1d0252090bjornv@webrtc.org// Creates an instance to the VAD structure.
30de4703c5d1290da22feeb708fe915179884e210fBjorn VolckerVadInst* WebRtcVad_Create();
31470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
3226e8a58130160ca2be411ed7ea2097a1d0252090bjornv@webrtc.org// Frees the dynamic memory of a specified VAD instance.
3326e8a58130160ca2be411ed7ea2097a1d0252090bjornv@webrtc.org//
3426e8a58130160ca2be411ed7ea2097a1d0252090bjornv@webrtc.org// - handle [i] : Pointer to VAD instance that should be freed.
352a796720f8b2b4e6fbcbf58e687781e412e4dc38bjornv@webrtc.orgvoid WebRtcVad_Free(VadInst* handle);
36470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
37ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org// Initializes a VAD instance.
38ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org//
39ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org// - handle [i/o] : Instance that should be initialized.
40ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org//
41ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.org// returns        : 0 - (OK),
4278f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org//                 -1 - (NULL pointer or Default mode could not be set).
43ed700db014a5dc533a7dc7b70630863847956792bjornv@webrtc.orgint WebRtcVad_Init(VadInst* handle);
44470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
4578f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
4678f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// restrictive in reporting speech. Put in other words the probability of being
4778f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// speech when the VAD returns 1 is increased with increasing mode. As a
4878f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// consequence also the missed detection rate goes up.
4978f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org//
5078f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// - handle [i/o] : VAD instance.
5178f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
5278f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org//
5378f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org// returns        : 0 - (OK),
5478f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org//                 -1 - (NULL pointer, mode could not be set or the VAD instance
5578f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.org//                       has not been initialized).
5678f0cdc1911a18b2c64f9a5815226be2c8dea6bfbjornv@webrtc.orgint WebRtcVad_set_mode(VadInst* handle, int mode);
57470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
58b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
59b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
60b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org//
61b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - handle       [i/o] : VAD Instance. Needs to be initialized by
62b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org//                        WebRtcVad_Init() before call.
63b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
64b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - audio_frame  [i]   : Audio frame buffer.
65b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - frame_length [i]   : Length of audio frame buffer in number of samples.
66b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org//
67b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// returns              : 1 - (Active Voice),
68b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org//                        0 - (Non-active Voice),
69b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org//                       -1 - (Error)
7065f933899b815b6c09f8ca7beefeace09ee3ae70andrew@webrtc.orgint WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame,
71dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting                      size_t frame_length);
72470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
73b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// Checks for valid combinations of |rate| and |frame_length|. We support 10,
74b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
75b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org//
76b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - rate         [i] : Sampling frequency (Hz).
77b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// - frame_length [i] : Speech frame buffer length in number of samples.
78b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org//
79b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org// returns            : 0 - (valid combination), -1 - (invalid combination)
80dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kastingint WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
81b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org
82470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com#ifdef __cplusplus
83470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com}
84470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com#endif
85470e71d3649f6cac4688e83819640b012b5d38bbniklase@google.com
86b1c3276f5a64741a09fd28f063a5ca34ed9473cfbjornv@webrtc.org#endif  // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
87