1d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org/*
2d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org *
4d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org *  Use of this source code is governed by a BSD-style license
5d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org *  that can be found in the LICENSE file in the root of the source
6d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org *  tree. An additional intellectual property rights grant can be found
7d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org *  in the file PATENTS.  All contributing project authors may
8d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
9d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org */
10d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
119c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
129c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
13d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
14d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org#include <assert.h>
1512dc1a38ca54a000e4fecfbc6d41138b895c9ca5pbos@webrtc.org#include <string.h>  // memset, size_t
16d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
1788fbb2d86b33a3886bba1af4d098efa2c19eb1e7henrike@webrtc.org#include "webrtc/base/constructormagic.h"
189c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
19d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org#include "webrtc/typedefs.h"
20d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
21d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.orgnamespace webrtc {
22d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
23d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org// Forward declarations.
24d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.orgclass BackgroundNoise;
25d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
26d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org// This is the base class for Accelerate and PreemptiveExpand. This class
27d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org// cannot be instantiated, but must be used through either of the derived
28d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org// classes.
29d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.orgclass TimeStretch {
30d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org public:
31d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  enum ReturnCodes {
32d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org    kSuccess = 0,
33d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org    kSuccessLowEnergy = 1,
34d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org    kNoStretch = 2,
35d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org    kError = -1
36d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  };
37d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
38d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  TimeStretch(int sample_rate_hz, size_t num_channels,
39d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org              const BackgroundNoise& background_noise)
40d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org      : sample_rate_hz_(sample_rate_hz),
41d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org        fs_mult_(sample_rate_hz / 8000),
42dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting        num_channels_(num_channels),
43d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org        master_channel_(0),  // First channel is master.
44d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org        background_noise_(background_noise),
45d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org        max_input_value_(0) {
46d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org    assert(sample_rate_hz_ == 8000 ||
47d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org           sample_rate_hz_ == 16000 ||
48d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org           sample_rate_hz_ == 32000 ||
49d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org           sample_rate_hz_ == 48000);
50d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org    assert(num_channels_ > 0);
51dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting    assert(master_channel_ < num_channels_);
52d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org    memset(auto_correlation_, 0, sizeof(auto_correlation_));
53d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  }
54d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
55d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  virtual ~TimeStretch() {}
56d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
57d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // This method performs the processing common to both Accelerate and
58d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // PreemptiveExpand.
59d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  ReturnCodes Process(const int16_t* input,
60d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org                      size_t input_len,
61cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin                      bool fast_mode,
62fd11bbfb56b42f82e18a744a414325db7a56013fhenrik.lundin@webrtc.org                      AudioMultiVector* output,
63dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting                      size_t* length_change_samples);
64d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
65d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org protected:
66d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // Sets the parameters |best_correlation| and |peak_index| to suitable
67d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // values when the signal contains no active speech. This method must be
68d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // implemented by the sub-classes.
69362a55e7b0852a7be95f0d627321503258152551turaj@webrtc.org  virtual void SetParametersForPassiveSpeech(size_t input_length,
70d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org                                             int16_t* best_correlation,
71dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting                                             size_t* peak_index) const = 0;
72d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
73d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // Checks the criteria for performing the time-stretching operation and,
74d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // if possible, performs the time-stretching. This method must be implemented
75d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // by the sub-classes.
76d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  virtual ReturnCodes CheckCriteriaAndStretch(
77cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin      const int16_t* input,
78cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin      size_t input_length,
79cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin      size_t peak_index,
80cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin      int16_t best_correlation,
81cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin      bool active_speech,
82cf808d2366e58b33540931d182f36800d9a15b0dHenrik Lundin      bool fast_mode,
83fd11bbfb56b42f82e18a744a414325db7a56013fhenrik.lundin@webrtc.org      AudioMultiVector* output) const = 0;
84d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
85dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting  static const size_t kCorrelationLen = 50;
86dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting  static const size_t kLogCorrelationLen = 6;  // >= log2(kCorrelationLen).
87dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting  static const size_t kMinLag = 10;
88dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting  static const size_t kMaxLag = 60;
89dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting  static const size_t kDownsampledLen = kCorrelationLen + kMaxLag;
90d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  static const int kCorrelationThreshold = 14746;  // 0.9 in Q14.
91d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
92d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  const int sample_rate_hz_;
93d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  const int fs_mult_;  // Sample rate multiplier = sample_rate_hz_ / 8000.
94dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting  const size_t num_channels_;
95d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  const size_t master_channel_;
96d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  const BackgroundNoise& background_noise_;
97d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  int16_t max_input_value_;
98d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  int16_t downsampled_input_[kDownsampledLen];
99d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // Adding 1 to the size of |auto_correlation_| because of how it is used
100d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // by the peak-detection algorithm.
101d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  int16_t auto_correlation_[kCorrelationLen + 1];
102d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
103d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org private:
104d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // Calculates the auto-correlation of |downsampled_input_| and writes the
105d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // result to |auto_correlation_|.
106d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  void AutoCorrelation();
107d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
108d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  // Performs a simple voice-activity detection based on the input parameters.
109d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org  bool SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,
110dce40cf804019a9898b6ab8d8262466b697c56e0Peter Kasting                       size_t peak_index, int scaling) const;
111d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
1123c089d751ede283e21e186885eaf705c3257ccd2henrikg  RTC_DISALLOW_COPY_AND_ASSIGN(TimeStretch);
113d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org};
114d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org
115d94659dc279b86376c1a6470dc326fd342caaa93henrik.lundin@webrtc.org}  // namespace webrtc
1169c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#endif  // WEBRTC_MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
117