1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
12#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
13
14#include "webrtc/base/scoped_ptr.h"
15#include "webrtc/modules/audio_processing/vad/common.h"
16#include "webrtc/typedefs.h"
17
18namespace webrtc {
19
20class AudioFrame;
21class PoleZeroFilter;
22
23class VadAudioProc {
24 public:
25  // Forward declare iSAC structs.
26  struct PitchAnalysisStruct;
27  struct PreFiltBankstr;
28
29  VadAudioProc();
30  ~VadAudioProc();
31
32  int ExtractFeatures(const int16_t* audio_frame,
33                      size_t length,
34                      AudioFeatures* audio_features);
35
36  static const size_t kDftSize = 512;
37
38 private:
39  void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
40  void SubframeCorrelation(double* corr,
41                           size_t length_corr,
42                           size_t subframe_index);
43  void GetLpcPolynomials(double* lpc, size_t length_lpc);
44  void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
45  void Rms(double* rms, size_t length_rms);
46  void ResetBuffer();
47
48  // To compute spectral peak we perform LPC analysis to get spectral envelope.
49  // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
50  // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
51  // we need 5 ms of past signal to create the input of LPC analysis.
52  static const size_t kNumPastSignalSamples =
53      static_cast<size_t>(kSampleRateHz / 200);
54
55  // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
56  // all the code recognize it as "no-error."
57  static const int kNoError = 0;
58
59  static const size_t kNum10msSubframes = 3;
60  static const size_t kNumSubframeSamples =
61      static_cast<size_t>(kSampleRateHz / 100);
62  static const size_t kNumSamplesToProcess =
63      kNum10msSubframes *
64      kNumSubframeSamples;  // Samples in 30 ms @ given sampling rate.
65  static const size_t kBufferLength =
66      kNumPastSignalSamples + kNumSamplesToProcess;
67  static const size_t kIpLength = kDftSize >> 1;
68  static const size_t kWLength = kDftSize >> 1;
69
70  static const size_t kLpcOrder = 16;
71
72  size_t ip_[kIpLength];
73  float w_fft_[kWLength];
74
75  // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
76  float audio_buffer_[kBufferLength];
77  size_t num_buffer_samples_;
78
79  double log_old_gain_;
80  double old_lag_;
81
82  rtc::scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
83  rtc::scoped_ptr<PreFiltBankstr> pre_filter_handle_;
84  rtc::scoped_ptr<PoleZeroFilter> high_pass_filter_;
85};
86
87}  // namespace webrtc
88
89#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
90