1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/audio_buffer.h"
12
13#include "webrtc/common_audio/include/audio_util.h"
14#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16#include "webrtc/common_audio/channel_buffer.h"
17#include "webrtc/modules/audio_processing/common.h"
18
19namespace webrtc {
20namespace {
21
22const size_t kSamplesPer16kHzChannel = 160;
23const size_t kSamplesPer32kHzChannel = 320;
24const size_t kSamplesPer48kHzChannel = 480;
25
26int KeyboardChannelIndex(const StreamConfig& stream_config) {
27  if (!stream_config.has_keyboard()) {
28    assert(false);
29    return 0;
30  }
31
32  return stream_config.num_channels();
33}
34
35size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
36  size_t num_bands = 1;
37  if (num_frames == kSamplesPer32kHzChannel ||
38      num_frames == kSamplesPer48kHzChannel) {
39    num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
40  }
41  return num_bands;
42}
43
44}  // namespace
45
46AudioBuffer::AudioBuffer(size_t input_num_frames,
47                         size_t num_input_channels,
48                         size_t process_num_frames,
49                         size_t num_process_channels,
50                         size_t output_num_frames)
51  : input_num_frames_(input_num_frames),
52    num_input_channels_(num_input_channels),
53    proc_num_frames_(process_num_frames),
54    num_proc_channels_(num_process_channels),
55    output_num_frames_(output_num_frames),
56    num_channels_(num_process_channels),
57    num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
58    num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
59    mixed_low_pass_valid_(false),
60    reference_copied_(false),
61    activity_(AudioFrame::kVadUnknown),
62    keyboard_data_(NULL),
63    data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {
64  assert(input_num_frames_ > 0);
65  assert(proc_num_frames_ > 0);
66  assert(output_num_frames_ > 0);
67  assert(num_input_channels_ > 0);
68  assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);
69
70  if (input_num_frames_ != proc_num_frames_ ||
71      output_num_frames_ != proc_num_frames_) {
72    // Create an intermediate buffer for resampling.
73    process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,
74                                                   num_proc_channels_));
75
76    if (input_num_frames_ != proc_num_frames_) {
77      for (size_t i = 0; i < num_proc_channels_; ++i) {
78        input_resamplers_.push_back(
79            new PushSincResampler(input_num_frames_,
80                                  proc_num_frames_));
81      }
82    }
83
84    if (output_num_frames_ != proc_num_frames_) {
85      for (size_t i = 0; i < num_proc_channels_; ++i) {
86        output_resamplers_.push_back(
87            new PushSincResampler(proc_num_frames_,
88                                  output_num_frames_));
89      }
90    }
91  }
92
93  if (num_bands_ > 1) {
94    split_data_.reset(new IFChannelBuffer(proc_num_frames_,
95                                          num_proc_channels_,
96                                          num_bands_));
97    splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
98                                                num_bands_,
99                                                proc_num_frames_));
100  }
101}
102
103AudioBuffer::~AudioBuffer() {}
104
105void AudioBuffer::CopyFrom(const float* const* data,
106                           const StreamConfig& stream_config) {
107  assert(stream_config.num_frames() == input_num_frames_);
108  assert(stream_config.num_channels() == num_input_channels_);
109  InitForNewData();
110  // Initialized lazily because there's a different condition in
111  // DeinterleaveFrom.
112  const bool need_to_downmix =
113      num_input_channels_ > 1 && num_proc_channels_ == 1;
114  if (need_to_downmix && !input_buffer_) {
115    input_buffer_.reset(
116        new IFChannelBuffer(input_num_frames_, num_proc_channels_));
117  }
118
119  if (stream_config.has_keyboard()) {
120    keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
121  }
122
123  // Downmix.
124  const float* const* data_ptr = data;
125  if (need_to_downmix) {
126    DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
127                                input_buffer_->fbuf()->channels()[0]);
128    data_ptr = input_buffer_->fbuf_const()->channels();
129  }
130
131  // Resample.
132  if (input_num_frames_ != proc_num_frames_) {
133    for (size_t i = 0; i < num_proc_channels_; ++i) {
134      input_resamplers_[i]->Resample(data_ptr[i],
135                                     input_num_frames_,
136                                     process_buffer_->channels()[i],
137                                     proc_num_frames_);
138    }
139    data_ptr = process_buffer_->channels();
140  }
141
142  // Convert to the S16 range.
143  for (size_t i = 0; i < num_proc_channels_; ++i) {
144    FloatToFloatS16(data_ptr[i],
145                    proc_num_frames_,
146                    data_->fbuf()->channels()[i]);
147  }
148}
149
150void AudioBuffer::CopyTo(const StreamConfig& stream_config,
151                         float* const* data) {
152  assert(stream_config.num_frames() == output_num_frames_);
153  assert(stream_config.num_channels() == num_channels_ || num_channels_ == 1);
154
155  // Convert to the float range.
156  float* const* data_ptr = data;
157  if (output_num_frames_ != proc_num_frames_) {
158    // Convert to an intermediate buffer for subsequent resampling.
159    data_ptr = process_buffer_->channels();
160  }
161  for (size_t i = 0; i < num_channels_; ++i) {
162    FloatS16ToFloat(data_->fbuf()->channels()[i],
163                    proc_num_frames_,
164                    data_ptr[i]);
165  }
166
167  // Resample.
168  if (output_num_frames_ != proc_num_frames_) {
169    for (size_t i = 0; i < num_channels_; ++i) {
170      output_resamplers_[i]->Resample(data_ptr[i],
171                                      proc_num_frames_,
172                                      data[i],
173                                      output_num_frames_);
174    }
175  }
176
177  // Upmix.
178  for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) {
179    memcpy(data[i], data[0], output_num_frames_ * sizeof(**data));
180  }
181}
182
183void AudioBuffer::InitForNewData() {
184  keyboard_data_ = NULL;
185  mixed_low_pass_valid_ = false;
186  reference_copied_ = false;
187  activity_ = AudioFrame::kVadUnknown;
188  num_channels_ = num_proc_channels_;
189}
190
191const int16_t* const* AudioBuffer::channels_const() const {
192  return data_->ibuf_const()->channels();
193}
194
195int16_t* const* AudioBuffer::channels() {
196  mixed_low_pass_valid_ = false;
197  return data_->ibuf()->channels();
198}
199
200const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const {
201  return split_data_.get() ?
202         split_data_->ibuf_const()->bands(channel) :
203         data_->ibuf_const()->bands(channel);
204}
205
206int16_t* const* AudioBuffer::split_bands(size_t channel) {
207  mixed_low_pass_valid_ = false;
208  return split_data_.get() ?
209         split_data_->ibuf()->bands(channel) :
210         data_->ibuf()->bands(channel);
211}
212
213const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
214  if (split_data_.get()) {
215    return split_data_->ibuf_const()->channels(band);
216  } else {
217    return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
218  }
219}
220
221int16_t* const* AudioBuffer::split_channels(Band band) {
222  mixed_low_pass_valid_ = false;
223  if (split_data_.get()) {
224    return split_data_->ibuf()->channels(band);
225  } else {
226    return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
227  }
228}
229
230ChannelBuffer<int16_t>* AudioBuffer::data() {
231  mixed_low_pass_valid_ = false;
232  return data_->ibuf();
233}
234
235const ChannelBuffer<int16_t>* AudioBuffer::data() const {
236  return data_->ibuf_const();
237}
238
239ChannelBuffer<int16_t>* AudioBuffer::split_data() {
240  mixed_low_pass_valid_ = false;
241  return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
242}
243
244const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
245  return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
246}
247
248const float* const* AudioBuffer::channels_const_f() const {
249  return data_->fbuf_const()->channels();
250}
251
252float* const* AudioBuffer::channels_f() {
253  mixed_low_pass_valid_ = false;
254  return data_->fbuf()->channels();
255}
256
257const float* const* AudioBuffer::split_bands_const_f(size_t channel) const {
258  return split_data_.get() ?
259         split_data_->fbuf_const()->bands(channel) :
260         data_->fbuf_const()->bands(channel);
261}
262
263float* const* AudioBuffer::split_bands_f(size_t channel) {
264  mixed_low_pass_valid_ = false;
265  return split_data_.get() ?
266         split_data_->fbuf()->bands(channel) :
267         data_->fbuf()->bands(channel);
268}
269
270const float* const* AudioBuffer::split_channels_const_f(Band band) const {
271  if (split_data_.get()) {
272    return split_data_->fbuf_const()->channels(band);
273  } else {
274    return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
275  }
276}
277
278float* const* AudioBuffer::split_channels_f(Band band) {
279  mixed_low_pass_valid_ = false;
280  if (split_data_.get()) {
281    return split_data_->fbuf()->channels(band);
282  } else {
283    return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
284  }
285}
286
287ChannelBuffer<float>* AudioBuffer::data_f() {
288  mixed_low_pass_valid_ = false;
289  return data_->fbuf();
290}
291
292const ChannelBuffer<float>* AudioBuffer::data_f() const {
293  return data_->fbuf_const();
294}
295
296ChannelBuffer<float>* AudioBuffer::split_data_f() {
297  mixed_low_pass_valid_ = false;
298  return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
299}
300
301const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
302  return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
303}
304
305const int16_t* AudioBuffer::mixed_low_pass_data() {
306  if (num_proc_channels_ == 1) {
307    return split_bands_const(0)[kBand0To8kHz];
308  }
309
310  if (!mixed_low_pass_valid_) {
311    if (!mixed_low_pass_channels_.get()) {
312      mixed_low_pass_channels_.reset(
313          new ChannelBuffer<int16_t>(num_split_frames_, 1));
314    }
315
316    DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
317                                    num_split_frames_, num_channels_,
318                                    mixed_low_pass_channels_->channels()[0]);
319    mixed_low_pass_valid_ = true;
320  }
321  return mixed_low_pass_channels_->channels()[0];
322}
323
324const int16_t* AudioBuffer::low_pass_reference(int channel) const {
325  if (!reference_copied_) {
326    return NULL;
327  }
328
329  return low_pass_reference_channels_->channels()[channel];
330}
331
332const float* AudioBuffer::keyboard_data() const {
333  return keyboard_data_;
334}
335
336void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
337  activity_ = activity;
338}
339
340AudioFrame::VADActivity AudioBuffer::activity() const {
341  return activity_;
342}
343
344size_t AudioBuffer::num_channels() const {
345  return num_channels_;
346}
347
348void AudioBuffer::set_num_channels(size_t num_channels) {
349  num_channels_ = num_channels;
350}
351
352size_t AudioBuffer::num_frames() const {
353  return proc_num_frames_;
354}
355
356size_t AudioBuffer::num_frames_per_band() const {
357  return num_split_frames_;
358}
359
360size_t AudioBuffer::num_keyboard_frames() const {
361  // We don't resample the keyboard channel.
362  return input_num_frames_;
363}
364
365size_t AudioBuffer::num_bands() const {
366  return num_bands_;
367}
368
369// The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
370void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
371  assert(frame->num_channels_ == num_input_channels_);
372  assert(frame->samples_per_channel_ == input_num_frames_);
373  InitForNewData();
374  // Initialized lazily because there's a different condition in CopyFrom.
375  if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
376    input_buffer_.reset(
377        new IFChannelBuffer(input_num_frames_, num_proc_channels_));
378  }
379  activity_ = frame->vad_activity_;
380
381  int16_t* const* deinterleaved;
382  if (input_num_frames_ == proc_num_frames_) {
383    deinterleaved = data_->ibuf()->channels();
384  } else {
385    deinterleaved = input_buffer_->ibuf()->channels();
386  }
387  if (num_proc_channels_ == 1) {
388    // Downmix and deinterleave simultaneously.
389    DownmixInterleavedToMono(frame->data_, input_num_frames_,
390                             num_input_channels_, deinterleaved[0]);
391  } else {
392    assert(num_proc_channels_ == num_input_channels_);
393    Deinterleave(frame->data_,
394                 input_num_frames_,
395                 num_proc_channels_,
396                 deinterleaved);
397  }
398
399  // Resample.
400  if (input_num_frames_ != proc_num_frames_) {
401    for (size_t i = 0; i < num_proc_channels_; ++i) {
402      input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i],
403                                     input_num_frames_,
404                                     data_->fbuf()->channels()[i],
405                                     proc_num_frames_);
406    }
407  }
408}
409
410void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) {
411  frame->vad_activity_ = activity_;
412  if (!data_changed) {
413    return;
414  }
415
416  assert(frame->num_channels_ == num_channels_ || num_channels_ == 1);
417  assert(frame->samples_per_channel_ == output_num_frames_);
418
419  // Resample if necessary.
420  IFChannelBuffer* data_ptr = data_.get();
421  if (proc_num_frames_ != output_num_frames_) {
422    if (!output_buffer_) {
423      output_buffer_.reset(
424          new IFChannelBuffer(output_num_frames_, num_channels_));
425    }
426    for (size_t i = 0; i < num_channels_; ++i) {
427      output_resamplers_[i]->Resample(
428          data_->fbuf()->channels()[i], proc_num_frames_,
429          output_buffer_->fbuf()->channels()[i], output_num_frames_);
430    }
431    data_ptr = output_buffer_.get();
432  }
433
434  if (frame->num_channels_ == num_channels_) {
435    Interleave(data_ptr->ibuf()->channels(), proc_num_frames_, num_channels_,
436               frame->data_);
437  } else {
438    UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], proc_num_frames_,
439                           frame->num_channels_, frame->data_);
440  }
441}
442
443void AudioBuffer::CopyLowPassToReference() {
444  reference_copied_ = true;
445  if (!low_pass_reference_channels_.get() ||
446      low_pass_reference_channels_->num_channels() != num_channels_) {
447    low_pass_reference_channels_.reset(
448        new ChannelBuffer<int16_t>(num_split_frames_,
449                                   num_proc_channels_));
450  }
451  for (size_t i = 0; i < num_proc_channels_; i++) {
452    memcpy(low_pass_reference_channels_->channels()[i],
453           split_bands_const(i)[kBand0To8kHz],
454           low_pass_reference_channels_->num_frames_per_band() *
455               sizeof(split_bands_const(i)[kBand0To8kHz][0]));
456  }
457}
458
459void AudioBuffer::SplitIntoFrequencyBands() {
460  splitting_filter_->Analysis(data_.get(), split_data_.get());
461}
462
463void AudioBuffer::MergeFrequencyBands() {
464  splitting_filter_->Synthesis(split_data_.get(), data_.get());
465}
466
467}  // namespace webrtc
468