audio_buffer.cc revision 64e753c3998a17429418180b3a947231a9fd98cd
1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/audio_buffer.h"
12
13#include "webrtc/common_audio/include/audio_util.h"
14#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16#include "webrtc/common_audio/channel_buffer.h"
17#include "webrtc/modules/audio_processing/common.h"
18
19namespace webrtc {
20namespace {
21
22const int kSamplesPer16kHzChannel = 160;
23const int kSamplesPer32kHzChannel = 320;
24const int kSamplesPer48kHzChannel = 480;
25
26bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
27  switch (layout) {
28    case AudioProcessing::kMono:
29    case AudioProcessing::kStereo:
30      return false;
31    case AudioProcessing::kMonoAndKeyboard:
32    case AudioProcessing::kStereoAndKeyboard:
33      return true;
34  }
35  assert(false);
36  return false;
37}
38
39int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
40  switch (layout) {
41    case AudioProcessing::kMono:
42    case AudioProcessing::kStereo:
43      assert(false);
44      return -1;
45    case AudioProcessing::kMonoAndKeyboard:
46      return 1;
47    case AudioProcessing::kStereoAndKeyboard:
48      return 2;
49  }
50  assert(false);
51  return -1;
52}
53
54template <typename T>
55void StereoToMono(const T* left, const T* right, T* out,
56                  int num_frames) {
57  for (int i = 0; i < num_frames; ++i)
58    out[i] = (left[i] + right[i]) / 2;
59}
60
61int NumBandsFromSamplesPerChannel(int num_frames) {
62  int num_bands = 1;
63  if (num_frames == kSamplesPer32kHzChannel ||
64      num_frames == kSamplesPer48kHzChannel) {
65    num_bands = rtc::CheckedDivExact(num_frames,
66                                     static_cast<int>(kSamplesPer16kHzChannel));
67  }
68  return num_bands;
69}
70
71}  // namespace
72
73AudioBuffer::AudioBuffer(int input_num_frames,
74                         int num_input_channels,
75                         int process_num_frames,
76                         int num_process_channels,
77                         int output_num_frames)
78  : input_num_frames_(input_num_frames),
79    num_input_channels_(num_input_channels),
80    proc_num_frames_(process_num_frames),
81    num_proc_channels_(num_process_channels),
82    output_num_frames_(output_num_frames),
83    num_channels_(num_process_channels),
84    num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
85    num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
86    mixed_low_pass_valid_(false),
87    reference_copied_(false),
88    activity_(AudioFrame::kVadUnknown),
89    keyboard_data_(NULL),
90    data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {
91  assert(input_num_frames_ > 0);
92  assert(proc_num_frames_ > 0);
93  assert(output_num_frames_ > 0);
94  assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
95  assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);
96
97  if (input_num_frames_ != proc_num_frames_ ||
98      output_num_frames_ != proc_num_frames_) {
99    // Create an intermediate buffer for resampling.
100    process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,
101                                                   num_proc_channels_));
102
103    if (input_num_frames_ != proc_num_frames_) {
104      for (int i = 0; i < num_proc_channels_; ++i) {
105        input_resamplers_.push_back(
106            new PushSincResampler(input_num_frames_,
107                                  proc_num_frames_));
108      }
109    }
110
111    if (output_num_frames_ != proc_num_frames_) {
112      for (int i = 0; i < num_proc_channels_; ++i) {
113        output_resamplers_.push_back(
114            new PushSincResampler(proc_num_frames_,
115                                  output_num_frames_));
116      }
117    }
118  }
119
120  if (num_bands_ > 1) {
121    split_data_.reset(new IFChannelBuffer(proc_num_frames_,
122                                          num_proc_channels_,
123                                          num_bands_));
124    splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
125                                                num_bands_,
126                                                proc_num_frames_));
127  }
128}
129
130AudioBuffer::~AudioBuffer() {}
131
132void AudioBuffer::CopyFrom(const float* const* data,
133                           int num_frames,
134                           AudioProcessing::ChannelLayout layout) {
135  assert(num_frames == input_num_frames_);
136  assert(ChannelsFromLayout(layout) == num_input_channels_);
137  InitForNewData();
138  // Initialized lazily because there's a different condition in
139  // DeinterleaveFrom.
140  if ((num_input_channels_ == 2 && num_proc_channels_ == 1) && !input_buffer_) {
141    input_buffer_.reset(
142        new IFChannelBuffer(input_num_frames_, num_proc_channels_));
143  }
144
145  if (HasKeyboardChannel(layout)) {
146    keyboard_data_ = data[KeyboardChannelIndex(layout)];
147  }
148
149  // Downmix.
150  const float* const* data_ptr = data;
151  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
152    StereoToMono(data[0],
153                 data[1],
154                 input_buffer_->fbuf()->channels()[0],
155                 input_num_frames_);
156    data_ptr = input_buffer_->fbuf_const()->channels();
157  }
158
159  // Resample.
160  if (input_num_frames_ != proc_num_frames_) {
161    for (int i = 0; i < num_proc_channels_; ++i) {
162      input_resamplers_[i]->Resample(data_ptr[i],
163                                     input_num_frames_,
164                                     process_buffer_->channels()[i],
165                                     proc_num_frames_);
166    }
167    data_ptr = process_buffer_->channels();
168  }
169
170  // Convert to the S16 range.
171  for (int i = 0; i < num_proc_channels_; ++i) {
172    FloatToFloatS16(data_ptr[i],
173                    proc_num_frames_,
174                    data_->fbuf()->channels()[i]);
175  }
176}
177
178void AudioBuffer::CopyTo(int num_frames,
179                         AudioProcessing::ChannelLayout layout,
180                         float* const* data) {
181  assert(num_frames == output_num_frames_);
182  assert(ChannelsFromLayout(layout) == num_channels_);
183
184  // Convert to the float range.
185  float* const* data_ptr = data;
186  if (output_num_frames_ != proc_num_frames_) {
187    // Convert to an intermediate buffer for subsequent resampling.
188    data_ptr = process_buffer_->channels();
189  }
190  for (int i = 0; i < num_channels_; ++i) {
191    FloatS16ToFloat(data_->fbuf()->channels()[i],
192                    proc_num_frames_,
193                    data_ptr[i]);
194  }
195
196  // Resample.
197  if (output_num_frames_ != proc_num_frames_) {
198    for (int i = 0; i < num_channels_; ++i) {
199      output_resamplers_[i]->Resample(data_ptr[i],
200                                      proc_num_frames_,
201                                      data[i],
202                                      output_num_frames_);
203    }
204  }
205}
206
207void AudioBuffer::InitForNewData() {
208  keyboard_data_ = NULL;
209  mixed_low_pass_valid_ = false;
210  reference_copied_ = false;
211  activity_ = AudioFrame::kVadUnknown;
212  num_channels_ = num_proc_channels_;
213}
214
215const int16_t* const* AudioBuffer::channels_const() const {
216  return data_->ibuf_const()->channels();
217}
218
219int16_t* const* AudioBuffer::channels() {
220  mixed_low_pass_valid_ = false;
221  return data_->ibuf()->channels();
222}
223
224const int16_t* const* AudioBuffer::split_bands_const(int channel) const {
225  return split_data_.get() ?
226         split_data_->ibuf_const()->bands(channel) :
227         data_->ibuf_const()->bands(channel);
228}
229
230int16_t* const* AudioBuffer::split_bands(int channel) {
231  mixed_low_pass_valid_ = false;
232  return split_data_.get() ?
233         split_data_->ibuf()->bands(channel) :
234         data_->ibuf()->bands(channel);
235}
236
237const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
238  if (split_data_.get()) {
239    return split_data_->ibuf_const()->channels(band);
240  } else {
241    return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
242  }
243}
244
245int16_t* const* AudioBuffer::split_channels(Band band) {
246  mixed_low_pass_valid_ = false;
247  if (split_data_.get()) {
248    return split_data_->ibuf()->channels(band);
249  } else {
250    return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
251  }
252}
253
254ChannelBuffer<int16_t>* AudioBuffer::data() {
255  mixed_low_pass_valid_ = false;
256  return data_->ibuf();
257}
258
259const ChannelBuffer<int16_t>* AudioBuffer::data() const {
260  return data_->ibuf_const();
261}
262
263ChannelBuffer<int16_t>* AudioBuffer::split_data() {
264  mixed_low_pass_valid_ = false;
265  return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
266}
267
268const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
269  return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
270}
271
272const float* const* AudioBuffer::channels_const_f() const {
273  return data_->fbuf_const()->channels();
274}
275
276float* const* AudioBuffer::channels_f() {
277  mixed_low_pass_valid_ = false;
278  return data_->fbuf()->channels();
279}
280
281const float* const* AudioBuffer::split_bands_const_f(int channel) const {
282  return split_data_.get() ?
283         split_data_->fbuf_const()->bands(channel) :
284         data_->fbuf_const()->bands(channel);
285}
286
287float* const* AudioBuffer::split_bands_f(int channel) {
288  mixed_low_pass_valid_ = false;
289  return split_data_.get() ?
290         split_data_->fbuf()->bands(channel) :
291         data_->fbuf()->bands(channel);
292}
293
294const float* const* AudioBuffer::split_channels_const_f(Band band) const {
295  if (split_data_.get()) {
296    return split_data_->fbuf_const()->channels(band);
297  } else {
298    return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
299  }
300}
301
302float* const* AudioBuffer::split_channels_f(Band band) {
303  mixed_low_pass_valid_ = false;
304  if (split_data_.get()) {
305    return split_data_->fbuf()->channels(band);
306  } else {
307    return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
308  }
309}
310
311ChannelBuffer<float>* AudioBuffer::data_f() {
312  mixed_low_pass_valid_ = false;
313  return data_->fbuf();
314}
315
316const ChannelBuffer<float>* AudioBuffer::data_f() const {
317  return data_->fbuf_const();
318}
319
320ChannelBuffer<float>* AudioBuffer::split_data_f() {
321  mixed_low_pass_valid_ = false;
322  return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
323}
324
325const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
326  return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
327}
328
329const int16_t* AudioBuffer::mixed_low_pass_data() {
330  // Currently only mixing stereo to mono is supported.
331  assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
332
333  if (num_proc_channels_ == 1) {
334    return split_bands_const(0)[kBand0To8kHz];
335  }
336
337  if (!mixed_low_pass_valid_) {
338    if (!mixed_low_pass_channels_.get()) {
339      mixed_low_pass_channels_.reset(
340          new ChannelBuffer<int16_t>(num_split_frames_, 1));
341    }
342    StereoToMono(split_bands_const(0)[kBand0To8kHz],
343                 split_bands_const(1)[kBand0To8kHz],
344                 mixed_low_pass_channels_->channels()[0],
345                 num_split_frames_);
346    mixed_low_pass_valid_ = true;
347  }
348  return mixed_low_pass_channels_->channels()[0];
349}
350
351const int16_t* AudioBuffer::low_pass_reference(int channel) const {
352  if (!reference_copied_) {
353    return NULL;
354  }
355
356  return low_pass_reference_channels_->channels()[channel];
357}
358
359const float* AudioBuffer::keyboard_data() const {
360  return keyboard_data_;
361}
362
363void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
364  activity_ = activity;
365}
366
367AudioFrame::VADActivity AudioBuffer::activity() const {
368  return activity_;
369}
370
371int AudioBuffer::num_channels() const {
372  return num_channels_;
373}
374
375void AudioBuffer::set_num_channels(int num_channels) {
376  num_channels_ = num_channels;
377}
378
379int AudioBuffer::num_frames() const {
380  return proc_num_frames_;
381}
382
383int AudioBuffer::num_frames_per_band() const {
384  return num_split_frames_;
385}
386
387int AudioBuffer::num_keyboard_frames() const {
388  // We don't resample the keyboard channel.
389  return input_num_frames_;
390}
391
392int AudioBuffer::num_bands() const {
393  return num_bands_;
394}
395
396// The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
397void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
398  assert(frame->num_channels_ == num_input_channels_);
399  assert(frame->samples_per_channel_ == input_num_frames_);
400  InitForNewData();
401  // Initialized lazily because there's a different condition in CopyFrom.
402  if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
403    input_buffer_.reset(
404        new IFChannelBuffer(input_num_frames_, num_proc_channels_));
405  }
406  activity_ = frame->vad_activity_;
407
408  int16_t* const* deinterleaved;
409  if (input_num_frames_ == proc_num_frames_) {
410    deinterleaved = data_->ibuf()->channels();
411  } else {
412    deinterleaved = input_buffer_->ibuf()->channels();
413  }
414  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
415    // Downmix directly; no explicit deinterleaving needed.
416    for (int i = 0; i < input_num_frames_; ++i) {
417      deinterleaved[0][i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2;
418    }
419  } else {
420    assert(num_proc_channels_ == num_input_channels_);
421    Deinterleave(frame->data_,
422                 input_num_frames_,
423                 num_proc_channels_,
424                 deinterleaved);
425  }
426
427  // Resample.
428  if (input_num_frames_ != proc_num_frames_) {
429    for (int i = 0; i < num_proc_channels_; ++i) {
430      input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i],
431                                     input_num_frames_,
432                                     data_->fbuf()->channels()[i],
433                                     proc_num_frames_);
434    }
435  }
436}
437
438void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
439  assert(proc_num_frames_ == output_num_frames_);
440  assert(num_channels_ == num_input_channels_);
441  assert(frame->num_channels_ == num_channels_);
442  assert(frame->samples_per_channel_ == proc_num_frames_);
443  frame->vad_activity_ = activity_;
444
445  if (!data_changed) {
446    return;
447  }
448
449  Interleave(data_->ibuf()->channels(),
450             proc_num_frames_,
451             num_channels_,
452             frame->data_);
453}
454
455void AudioBuffer::CopyLowPassToReference() {
456  reference_copied_ = true;
457  if (!low_pass_reference_channels_.get() ||
458      low_pass_reference_channels_->num_channels() != num_channels_) {
459    low_pass_reference_channels_.reset(
460        new ChannelBuffer<int16_t>(num_split_frames_,
461                                   num_proc_channels_));
462  }
463  for (int i = 0; i < num_proc_channels_; i++) {
464    memcpy(low_pass_reference_channels_->channels()[i],
465           split_bands_const(i)[kBand0To8kHz],
466           low_pass_reference_channels_->num_frames_per_band() *
467               sizeof(split_bands_const(i)[kBand0To8kHz][0]));
468  }
469}
470
471void AudioBuffer::SplitIntoFrequencyBands() {
472  splitting_filter_->Analysis(data_.get(), split_data_.get());
473}
474
475void AudioBuffer::MergeFrequencyBands() {
476  splitting_filter_->Synthesis(split_data_.get(), data_.get());
477}
478
479}  // namespace webrtc
480