1/*
2 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
12#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
13
14#include <deque>
15#include <set>
16
17#include "webrtc/base/scoped_ptr.h"
18#include "webrtc/test/testsupport/gtest_prod_util.h"
19#include "webrtc/typedefs.h"
20
21namespace webrtc {
22
23class TransientDetector;
24
25// Detects transients in an audio stream and suppress them using a simple
26// restoration algorithm that attenuates unexpected spikes in the spectrum.
27class TransientSuppressor {
28 public:
29  TransientSuppressor();
30  ~TransientSuppressor();
31
32  int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
33
34  // Processes a |data| chunk, and returns it with keystrokes suppressed from
35  // it. The float format is assumed to be int16 ranged. If there are more than
36  // one channel, the chunks are concatenated one after the other in |data|.
37  // |data_length| must be equal to |data_length_|.
38  // |num_channels| must be equal to |num_channels_|.
39  // A sub-band, ideally the higher, can be used as |detection_data|. If it is
40  // NULL, |data| is used for the detection too. The |detection_data| is always
41  // assumed mono.
42  // If a reference signal (e.g. keyboard microphone) is available, it can be
43  // passed in as |reference_data|. It is assumed mono and must have the same
44  // length as |data|. NULL is accepted if unavailable.
45  // This suppressor performs better if voice information is available.
46  // |voice_probability| is the probability of voice being present in this chunk
47  // of audio. If voice information is not available, |voice_probability| must
48  // always be set to 1.
49  // |key_pressed| determines if a key was pressed on this audio chunk.
50  // Returns 0 on success and -1 otherwise.
51  int Suppress(float* data,
52               size_t data_length,
53               int num_channels,
54               const float* detection_data,
55               size_t detection_length,
56               const float* reference_data,
57               size_t reference_length,
58               float voice_probability,
59               bool key_pressed);
60
61 private:
62  FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
63                           TypingDetectionLogicWorksAsExpectedForMono);
64  void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
65
66  void UpdateKeypress(bool key_pressed);
67  void UpdateRestoration(float voice_probability);
68
69  void UpdateBuffers(float* data);
70
71  void HardRestoration(float* spectral_mean);
72  void SoftRestoration(float* spectral_mean);
73
74  rtc::scoped_ptr<TransientDetector> detector_;
75
76  size_t data_length_;
77  size_t detection_length_;
78  size_t analysis_length_;
79  size_t buffer_delay_;
80  size_t complex_analysis_length_;
81  int num_channels_;
82  // Input buffer where the original samples are stored.
83  rtc::scoped_ptr<float[]> in_buffer_;
84  rtc::scoped_ptr<float[]> detection_buffer_;
85  // Output buffer where the restored samples are stored.
86  rtc::scoped_ptr<float[]> out_buffer_;
87
88  // Arrays for fft.
89  rtc::scoped_ptr<size_t[]> ip_;
90  rtc::scoped_ptr<float[]> wfft_;
91
92  rtc::scoped_ptr<float[]> spectral_mean_;
93
94  // Stores the data for the fft.
95  rtc::scoped_ptr<float[]> fft_buffer_;
96
97  rtc::scoped_ptr<float[]> magnitudes_;
98
99  const float* window_;
100
101  rtc::scoped_ptr<float[]> mean_factor_;
102
103  float detector_smoothed_;
104
105  int keypress_counter_;
106  int chunks_since_keypress_;
107  bool detection_enabled_;
108  bool suppression_enabled_;
109
110  bool use_hard_restoration_;
111  int chunks_since_voice_change_;
112
113  uint32_t seed_;
114
115  bool using_reference_;
116};
117
118}  // namespace webrtc
119
120#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
121