15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  Use of this source code is governed by a BSD-style license
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  that can be found in the LICENSE file in the root of the source
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  tree. An additional intellectual property rights grant can be found
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  in the file PATENTS.  All contributing project authors may
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *  be found in the AUTHORS file in the root of the source tree.
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stddef.h>  // size_t
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdlib.h>
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "gtest/gtest.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "typedefs.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "webrtc_vad.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TODO(bjornv): Move the internal unit tests to separate files.
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)extern "C" {
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "vad_core.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "vad_gmm.h"
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "vad_sp.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace webrtc {
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const int16_t kModes[] = { 0, 1, 2, 3 };
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const size_t kModesSize = sizeof(kModes) / sizeof(*kModes);
29
30// Rates we support.
31const int16_t kRates[] = { 8000, 12000, 16000, 24000, 32000 };
32const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates);
33// Frame lengths we support.
34const int16_t kMaxFrameLength = 960;
35const int16_t kFrameLengths[] = { 80, 120, 160, 240, 320, 480, 640,
36    kMaxFrameLength };
37const size_t kFrameLengthsSize = sizeof(kFrameLengths) / sizeof(*kFrameLengths);
38
39// Returns true if the rate and frame length combination is valid.
40bool ValidRatesAndFrameLengths(int16_t rate, int16_t frame_length) {
41  if (rate == 8000) {
42    if (frame_length == 80 || frame_length == 160 || frame_length == 240) {
43      return true;
44    }
45    return false;
46  } else if (rate == 16000) {
47    if (frame_length == 160 || frame_length == 320 || frame_length == 480) {
48      return true;
49    }
50    return false;
51  }
52  if (rate == 32000) {
53    if (frame_length == 320 || frame_length == 640 || frame_length == 960) {
54      return true;
55    }
56    return false;
57  }
58
59  return false;
60}
61
62class VadTest : public ::testing::Test {
63 protected:
64  VadTest();
65  virtual void SetUp();
66  virtual void TearDown();
67};
68
69VadTest::VadTest() {
70}
71
72void VadTest::SetUp() {
73}
74
75void VadTest::TearDown() {
76}
77
78TEST_F(VadTest, ApiTest) {
79  // This API test runs through the APIs for all possible valid and invalid
80  // combinations.
81
82  VadInst* handle = NULL;
83  int16_t zeros[kMaxFrameLength] = { 0 };
84
85  // Construct a speech signal that will trigger the VAD in all modes. It is
86  // known that (i * i) will wrap around, but that doesn't matter in this case.
87  int16_t speech[kMaxFrameLength];
88  for (int16_t i = 0; i < kMaxFrameLength; i++) {
89    speech[i] = (i * i);
90  }
91
92  // WebRtcVad_get_version() tests
93  char version[32];
94  EXPECT_EQ(-1, WebRtcVad_get_version(NULL, sizeof(version)));
95  EXPECT_EQ(-1, WebRtcVad_get_version(version, 1));
96  EXPECT_EQ(0, WebRtcVad_get_version(version, sizeof(version)));
97
98  // Null instance tests
99  EXPECT_EQ(-1, WebRtcVad_Create(NULL));
100  EXPECT_EQ(-1, WebRtcVad_Init(NULL));
101  EXPECT_EQ(-1, WebRtcVad_Assign(NULL, NULL));
102  EXPECT_EQ(-1, WebRtcVad_Free(NULL));
103  EXPECT_EQ(-1, WebRtcVad_set_mode(NULL, kModes[0]));
104  EXPECT_EQ(-1, WebRtcVad_Process(NULL, kRates[0], speech, kFrameLengths[0]));
105
106  // WebRtcVad_AssignSize tests
107  int handle_size_bytes = 0;
108  EXPECT_EQ(0, WebRtcVad_AssignSize(&handle_size_bytes));
109  EXPECT_EQ(576, handle_size_bytes);
110
111  // WebRtcVad_Assign tests
112  void* tmp_handle = malloc(handle_size_bytes);
113  EXPECT_EQ(-1, WebRtcVad_Assign(&handle, NULL));
114  EXPECT_EQ(0, WebRtcVad_Assign(&handle, tmp_handle));
115  EXPECT_EQ(handle, tmp_handle);
116  free(tmp_handle);
117
118  // WebRtcVad_Create()
119  ASSERT_EQ(0, WebRtcVad_Create(&handle));
120
121  // Not initialized tests
122  EXPECT_EQ(-1, WebRtcVad_Process(handle, kRates[0], speech, kFrameLengths[0]));
123  EXPECT_EQ(-1, WebRtcVad_set_mode(handle, kModes[0]));
124
125  // WebRtcVad_Init() test
126  ASSERT_EQ(0, WebRtcVad_Init(handle));
127
128  // WebRtcVad_set_mode() invalid modes tests
129  EXPECT_EQ(-1, WebRtcVad_set_mode(handle, kModes[0] - 1));
130  EXPECT_EQ(-1, WebRtcVad_set_mode(handle, kModes[kModesSize - 1] + 1));
131
132  // WebRtcVad_Process() tests
133  // NULL speech pointer
134  EXPECT_EQ(-1, WebRtcVad_Process(handle, kRates[0], NULL, kFrameLengths[0]));
135  // Invalid sampling rate
136  EXPECT_EQ(-1, WebRtcVad_Process(handle, 9999, speech, kFrameLengths[0]));
137  // All zeros as input should work
138  EXPECT_EQ(0, WebRtcVad_Process(handle, kRates[0], zeros, kFrameLengths[0]));
139  for (size_t k = 0; k < kModesSize; k++) {
140    // Test valid modes
141    EXPECT_EQ(0, WebRtcVad_set_mode(handle, kModes[k]));
142    // Loop through sampling rate and frame length combinations
143    for (size_t i = 0; i < kRatesSize; i++) {
144      for (size_t j = 0; j < kFrameLengthsSize; j++) {
145        if (ValidRatesAndFrameLengths(kRates[i], kFrameLengths[j])) {
146          EXPECT_EQ(1, WebRtcVad_Process(handle,
147                                         kRates[i],
148                                         speech,
149                                         kFrameLengths[j]));
150        } else {
151          EXPECT_EQ(-1, WebRtcVad_Process(handle,
152                                          kRates[i],
153                                          speech,
154                                          kFrameLengths[j]));
155        }
156      }
157    }
158  }
159
160  EXPECT_EQ(0, WebRtcVad_Free(handle));
161}
162
163TEST_F(VadTest, GMMTests) {
164  int16_t delta = 0;
165  // Input value at mean.
166  EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(0, 0, 128, &delta));
167  EXPECT_EQ(0, delta);
168  EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(16, 128, 128, &delta));
169  EXPECT_EQ(0, delta);
170  EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(-16, -128, 128, &delta));
171  EXPECT_EQ(0, delta);
172
173  // Largest possible input to give non-zero probability.
174  EXPECT_EQ(1024, WebRtcVad_GaussianProbability(59, 0, 128, &delta));
175  EXPECT_EQ(7552, delta);
176  EXPECT_EQ(1024, WebRtcVad_GaussianProbability(75, 128, 128, &delta));
177  EXPECT_EQ(7552, delta);
178  EXPECT_EQ(1024, WebRtcVad_GaussianProbability(-75, -128, 128, &delta));
179  EXPECT_EQ(-7552, delta);
180
181  // Too large input, should give zero probability.
182  EXPECT_EQ(0, WebRtcVad_GaussianProbability(105, 0, 128, &delta));
183  EXPECT_EQ(13440, delta);
184}
185
186TEST_F(VadTest, SPTests) {
187  VadInstT* handle = (VadInstT*) malloc(sizeof(VadInstT));
188  int16_t zeros[kMaxFrameLength] = { 0 };
189  int32_t state[2] = { 0 };
190  int16_t data_in[kMaxFrameLength];
191  int16_t data_out[kMaxFrameLength];
192
193  const int16_t kReferenceMin[32] = {
194      1600, 720, 509, 512, 532, 552, 570, 588,
195      606, 624, 642, 659, 675, 691, 707, 723,
196      1600, 544, 502, 522, 542, 561, 579, 597,
197      615, 633, 651, 667, 683, 699, 715, 731
198  };
199
200  // Construct a speech signal that will trigger the VAD in all modes. It is
201  // known that (i * i) will wrap around, but that doesn't matter in this case.
202  for (int16_t i = 0; i < kMaxFrameLength; ++i) {
203    data_in[i] = (i * i);
204  }
205  // Input values all zeros, expect all zeros out.
206  WebRtcVad_Downsampling(zeros, data_out, state, (int) kMaxFrameLength);
207  EXPECT_EQ(0, state[0]);
208  EXPECT_EQ(0, state[1]);
209  for (int16_t i = 0; i < kMaxFrameLength / 2; ++i) {
210    EXPECT_EQ(0, data_out[i]);
211  }
212  // Make a simple non-zero data test.
213  WebRtcVad_Downsampling(data_in, data_out, state, (int) kMaxFrameLength);
214  EXPECT_EQ(207, state[0]);
215  EXPECT_EQ(2270, state[1]);
216
217  ASSERT_EQ(0, WebRtcVad_InitCore(handle, 0));
218  for (int16_t i = 0; i < 16; ++i) {
219    int16_t value = 500 * (i + 1);
220    for (int j = 0; j < NUM_CHANNELS; ++j) {
221      // Use values both above and below initialized value.
222      EXPECT_EQ(kReferenceMin[i], WebRtcVad_FindMinimum(handle, value, j));
223      EXPECT_EQ(kReferenceMin[i + 16], WebRtcVad_FindMinimum(handle, 12000, j));
224    }
225    handle->frame_counter++;
226  }
227
228  free(handle);
229}
230
231// TODO(bjornv): Add a process test, run on file.
232
233}  // namespace
234}  // namespace webrtc
235