1d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org/* 2d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org * 4d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org * Use of this source code is governed by a BSD-style license 5d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org * that can be found in the LICENSE file in the root of the source 6d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org * tree. An additional intellectual property rights grant can be found 7d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org * in the file PATENTS. All contributing project authors may 8d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org * be found in the AUTHORS file in the root of the source tree. 9d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org */ 10d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 119c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_ 129c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_ 13d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 14d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org#include <assert.h> 1512dc1a38ca54a000e4fecfbc6d41138b895c9ca5pbos@webrtc.org#include <string.h> // memset, size_t 16d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 1788fbb2d86b33a3886bba1af4d098efa2c19eb1e7henrike@webrtc.org#include "webrtc/base/constructormagic.h" 189c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h" 19d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org#include "webrtc/typedefs.h" 20d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 21d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.orgnamespace webrtc { 22d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 23d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org// Forward declarations. 24d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.orgclass BackgroundNoise; 25d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 26d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org// This is the base class for Accelerate and PreemptiveExpand. This class 27d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org// cannot be instantiated, but must be used through either of the derived 28d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org// classes. 29d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.orgclass TimeStretch { 30d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org public: 31d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org enum ReturnCodes { 32d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org kSuccess = 0, 33d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org kSuccessLowEnergy = 1, 34d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org kNoStretch = 2, 35d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org kError = -1 36d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org }; 37d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 38d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org TimeStretch(int sample_rate_hz, size_t num_channels, 39d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org const BackgroundNoise& background_noise) 40d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org : sample_rate_hz_(sample_rate_hz), 41d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org fs_mult_(sample_rate_hz / 8000), 42dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting num_channels_(num_channels), 43d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org master_channel_(0), // First channel is master. 44d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org background_noise_(background_noise), 45d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org max_input_value_(0) { 46d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org assert(sample_rate_hz_ == 8000 || 47d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org sample_rate_hz_ == 16000 || 48d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org sample_rate_hz_ == 32000 || 49d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org sample_rate_hz_ == 48000); 50d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org assert(num_channels_ > 0); 51dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting assert(master_channel_ < num_channels_); 52d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org memset(auto_correlation_, 0, sizeof(auto_correlation_)); 53d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org } 54d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 55d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org virtual ~TimeStretch() {} 56d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 57d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // This method performs the processing common to both Accelerate and 58d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // PreemptiveExpand. 59d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org ReturnCodes Process(const int16_t* input, 60d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org size_t input_len, 61cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin bool fast_mode, 62fd11bbfb56b42f82e18a744a414325db7a56013fhenrik.lundin@webrtc.org AudioMultiVector* output, 63dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t* length_change_samples); 64d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 65d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org protected: 66d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // Sets the parameters |best_correlation| and |peak_index| to suitable 67d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // values when the signal contains no active speech. This method must be 68d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // implemented by the sub-classes. 69362a55e7b0852a7be95f0d627321503258152551turaj@webrtc.org virtual void SetParametersForPassiveSpeech(size_t input_length, 70d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org int16_t* best_correlation, 71dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t* peak_index) const = 0; 72d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 73d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // Checks the criteria for performing the time-stretching operation and, 74d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // if possible, performs the time-stretching. This method must be implemented 75d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // by the sub-classes. 76d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org virtual ReturnCodes CheckCriteriaAndStretch( 77cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin const int16_t* input, 78cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin size_t input_length, 79cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin size_t peak_index, 80cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin int16_t best_correlation, 81cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin bool active_speech, 82cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin bool fast_mode, 83fd11bbfb56b42f82e18a744a414325db7a56013fhenrik.lundin@webrtc.org AudioMultiVector* output) const = 0; 84d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 85dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting static const size_t kCorrelationLen = 50; 86dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting static const size_t kLogCorrelationLen = 6; // >= log2(kCorrelationLen). 87dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting static const size_t kMinLag = 10; 88dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting static const size_t kMaxLag = 60; 89dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting static const size_t kDownsampledLen = kCorrelationLen + kMaxLag; 90d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org static const int kCorrelationThreshold = 14746; // 0.9 in Q14. 91d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 92d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org const int sample_rate_hz_; 93d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org const int fs_mult_; // Sample rate multiplier = sample_rate_hz_ / 8000. 94dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting const size_t num_channels_; 95d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org const size_t master_channel_; 96d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org const BackgroundNoise& background_noise_; 97d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org int16_t max_input_value_; 98d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org int16_t downsampled_input_[kDownsampledLen]; 99d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // Adding 1 to the size of |auto_correlation_| because of how it is used 100d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // by the peak-detection algorithm. 101d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org int16_t auto_correlation_[kCorrelationLen + 1]; 102d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 103d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org private: 104d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // Calculates the auto-correlation of |downsampled_input_| and writes the 105d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // result to |auto_correlation_|. 106d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org void AutoCorrelation(); 107d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 108d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org // Performs a simple voice-activity detection based on the input parameters. 109d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org bool SpeechDetection(int32_t vec1_energy, int32_t vec2_energy, 110dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting size_t peak_index, int scaling) const; 111d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 1123c089d751ede283e21e186885eaf705c3257ccd2henrikg RTC_DISALLOW_COPY_AND_ASSIGN(TimeStretch); 113d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org}; 114d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org 115d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org} // namespace webrtc 1169c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_ 117