audio_buffer.cc revision 8328e7c44d59bb9fcbc7f8a033beb3d073929518
1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/audio_buffer.h"
12
13#include "webrtc/common_audio/include/audio_util.h"
14#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16
17namespace webrtc {
18namespace {
19
20enum {
21  kSamplesPer8kHzChannel = 80,
22  kSamplesPer16kHzChannel = 160,
23  kSamplesPer32kHzChannel = 320
24};
25
26bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
27  switch (layout) {
28    case AudioProcessing::kMono:
29    case AudioProcessing::kStereo:
30      return false;
31    case AudioProcessing::kMonoAndKeyboard:
32    case AudioProcessing::kStereoAndKeyboard:
33      return true;
34  }
35  assert(false);
36  return false;
37}
38
39int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
40  switch (layout) {
41    case AudioProcessing::kMono:
42    case AudioProcessing::kStereo:
43      assert(false);
44      return -1;
45    case AudioProcessing::kMonoAndKeyboard:
46      return 1;
47    case AudioProcessing::kStereoAndKeyboard:
48      return 2;
49  }
50  assert(false);
51  return -1;
52}
53
54template <typename T>
55void StereoToMono(const T* left, const T* right, T* out,
56                  int samples_per_channel) {
57  for (int i = 0; i < samples_per_channel; ++i)
58    out[i] = (left[i] + right[i]) / 2;
59}
60
61}  // namespace
62
63// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
64// broken when someone requests write access to either ChannelBuffer, and
65// reestablished when someone requests the outdated ChannelBuffer. It is
66// therefore safe to use the return value of ibuf_const() and fbuf_const()
67// until the next call to ibuf() or fbuf(), and the return value of ibuf() and
68// fbuf() until the next call to any of the other functions.
69class IFChannelBuffer {
70 public:
71  IFChannelBuffer(int samples_per_channel, int num_channels)
72      : ivalid_(true),
73        ibuf_(samples_per_channel, num_channels),
74        fvalid_(true),
75        fbuf_(samples_per_channel, num_channels) {}
76
77  ChannelBuffer<int16_t>* ibuf() { return ibuf(false); }
78  ChannelBuffer<float>* fbuf() { return fbuf(false); }
79  const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); }
80  const ChannelBuffer<float>* fbuf_const() { return fbuf(true); }
81
82 private:
83  ChannelBuffer<int16_t>* ibuf(bool readonly) {
84    RefreshI();
85    fvalid_ = readonly;
86    return &ibuf_;
87  }
88
89  ChannelBuffer<float>* fbuf(bool readonly) {
90    RefreshF();
91    ivalid_ = readonly;
92    return &fbuf_;
93  }
94
95  void RefreshF() {
96    if (!fvalid_) {
97      assert(ivalid_);
98      const int16_t* const int_data = ibuf_.data();
99      float* const float_data = fbuf_.data();
100      const int length = fbuf_.length();
101      for (int i = 0; i < length; ++i)
102        float_data[i] = int_data[i];
103      fvalid_ = true;
104    }
105  }
106
107  void RefreshI() {
108    if (!ivalid_) {
109      assert(fvalid_);
110      FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data());
111      ivalid_ = true;
112    }
113  }
114
115  bool ivalid_;
116  ChannelBuffer<int16_t> ibuf_;
117  bool fvalid_;
118  ChannelBuffer<float> fbuf_;
119};
120
121AudioBuffer::AudioBuffer(int input_samples_per_channel,
122                         int num_input_channels,
123                         int process_samples_per_channel,
124                         int num_process_channels,
125                         int output_samples_per_channel)
126  : input_samples_per_channel_(input_samples_per_channel),
127    num_input_channels_(num_input_channels),
128    proc_samples_per_channel_(process_samples_per_channel),
129    num_proc_channels_(num_process_channels),
130    output_samples_per_channel_(output_samples_per_channel),
131    samples_per_split_channel_(proc_samples_per_channel_),
132    mixed_low_pass_valid_(false),
133    reference_copied_(false),
134    activity_(AudioFrame::kVadUnknown),
135    keyboard_data_(NULL),
136    channels_(new IFChannelBuffer(proc_samples_per_channel_,
137                                  num_proc_channels_)) {
138  assert(input_samples_per_channel_ > 0);
139  assert(proc_samples_per_channel_ > 0);
140  assert(output_samples_per_channel_ > 0);
141  assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
142  assert(num_proc_channels_ <= num_input_channels);
143
144  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
145    input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
146                                                 num_proc_channels_));
147  }
148
149  if (input_samples_per_channel_ != proc_samples_per_channel_ ||
150      output_samples_per_channel_ != proc_samples_per_channel_) {
151    // Create an intermediate buffer for resampling.
152    process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
153                                                   num_proc_channels_));
154  }
155
156  if (input_samples_per_channel_ != proc_samples_per_channel_) {
157    input_resamplers_.reserve(num_proc_channels_);
158    for (int i = 0; i < num_proc_channels_; ++i) {
159      input_resamplers_.push_back(
160          new PushSincResampler(input_samples_per_channel_,
161                                proc_samples_per_channel_));
162    }
163  }
164
165  if (output_samples_per_channel_ != proc_samples_per_channel_) {
166    output_resamplers_.reserve(num_proc_channels_);
167    for (int i = 0; i < num_proc_channels_; ++i) {
168      output_resamplers_.push_back(
169          new PushSincResampler(proc_samples_per_channel_,
170                                output_samples_per_channel_));
171    }
172  }
173
174  if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
175    samples_per_split_channel_ = kSamplesPer16kHzChannel;
176    split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_,
177                                                  num_proc_channels_));
178    split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_,
179                                                   num_proc_channels_));
180    filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
181  }
182}
183
184AudioBuffer::~AudioBuffer() {}
185
186void AudioBuffer::CopyFrom(const float* const* data,
187                           int samples_per_channel,
188                           AudioProcessing::ChannelLayout layout) {
189  assert(samples_per_channel == input_samples_per_channel_);
190  assert(ChannelsFromLayout(layout) == num_input_channels_);
191  InitForNewData();
192
193  if (HasKeyboardChannel(layout)) {
194    keyboard_data_ = data[KeyboardChannelIndex(layout)];
195  }
196
197  // Downmix.
198  const float* const* data_ptr = data;
199  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
200    StereoToMono(data[0],
201                 data[1],
202                 input_buffer_->channel(0),
203                 input_samples_per_channel_);
204    data_ptr = input_buffer_->channels();
205  }
206
207  // Resample.
208  if (input_samples_per_channel_ != proc_samples_per_channel_) {
209    for (int i = 0; i < num_proc_channels_; ++i) {
210      input_resamplers_[i]->Resample(data_ptr[i],
211                                     input_samples_per_channel_,
212                                     process_buffer_->channel(i),
213                                     proc_samples_per_channel_);
214    }
215    data_ptr = process_buffer_->channels();
216  }
217
218  // Convert to the S16 range.
219  for (int i = 0; i < num_proc_channels_; ++i) {
220    FloatToFloatS16(data_ptr[i], proc_samples_per_channel_,
221                    channels_->fbuf()->channel(i));
222  }
223}
224
225void AudioBuffer::CopyTo(int samples_per_channel,
226                         AudioProcessing::ChannelLayout layout,
227                         float* const* data) {
228  assert(samples_per_channel == output_samples_per_channel_);
229  assert(ChannelsFromLayout(layout) == num_proc_channels_);
230
231  // Convert to the float range.
232  float* const* data_ptr = data;
233  if (output_samples_per_channel_ != proc_samples_per_channel_) {
234    // Convert to an intermediate buffer for subsequent resampling.
235    data_ptr = process_buffer_->channels();
236  }
237  for (int i = 0; i < num_proc_channels_; ++i) {
238    FloatS16ToFloat(channels_->fbuf()->channel(i), proc_samples_per_channel_,
239                    data_ptr[i]);
240  }
241
242  // Resample.
243  if (output_samples_per_channel_ != proc_samples_per_channel_) {
244    for (int i = 0; i < num_proc_channels_; ++i) {
245      output_resamplers_[i]->Resample(data_ptr[i],
246                                      proc_samples_per_channel_,
247                                      data[i],
248                                      output_samples_per_channel_);
249    }
250  }
251}
252
253void AudioBuffer::InitForNewData() {
254  keyboard_data_ = NULL;
255  mixed_low_pass_valid_ = false;
256  reference_copied_ = false;
257  activity_ = AudioFrame::kVadUnknown;
258}
259
260const int16_t* AudioBuffer::data(int channel) const {
261  return channels_->ibuf_const()->channel(channel);
262}
263
264int16_t* AudioBuffer::data(int channel) {
265  mixed_low_pass_valid_ = false;
266  return channels_->ibuf()->channel(channel);
267}
268
269const float* AudioBuffer::data_f(int channel) const {
270  return channels_->fbuf_const()->channel(channel);
271}
272
273float* AudioBuffer::data_f(int channel) {
274  mixed_low_pass_valid_ = false;
275  return channels_->fbuf()->channel(channel);
276}
277
278const float* const* AudioBuffer::channels_f() const {
279  return channels_->fbuf_const()->channels();
280}
281
282float* const* AudioBuffer::channels_f() {
283  mixed_low_pass_valid_ = false;
284  return channels_->fbuf()->channels();
285}
286
287const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
288  return split_channels_low_.get()
289      ? split_channels_low_->ibuf_const()->channel(channel)
290      : data(channel);
291}
292
293int16_t* AudioBuffer::low_pass_split_data(int channel) {
294  mixed_low_pass_valid_ = false;
295  return split_channels_low_.get()
296      ? split_channels_low_->ibuf()->channel(channel)
297      : data(channel);
298}
299
300const float* AudioBuffer::low_pass_split_data_f(int channel) const {
301  return split_channels_low_.get()
302      ? split_channels_low_->fbuf_const()->channel(channel)
303      : data_f(channel);
304}
305
306float* AudioBuffer::low_pass_split_data_f(int channel) {
307  mixed_low_pass_valid_ = false;
308  return split_channels_low_.get()
309      ? split_channels_low_->fbuf()->channel(channel)
310      : data_f(channel);
311}
312
313const float* const* AudioBuffer::low_pass_split_channels_f() const {
314  return split_channels_low_.get()
315      ? split_channels_low_->fbuf_const()->channels()
316      : channels_f();
317}
318
319float* const* AudioBuffer::low_pass_split_channels_f() {
320  mixed_low_pass_valid_ = false;
321  return split_channels_low_.get()
322      ? split_channels_low_->fbuf()->channels()
323      : channels_f();
324}
325
326const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
327  return split_channels_high_.get()
328      ? split_channels_high_->ibuf_const()->channel(channel)
329      : NULL;
330}
331
332int16_t* AudioBuffer::high_pass_split_data(int channel) {
333  return split_channels_high_.get()
334      ? split_channels_high_->ibuf()->channel(channel)
335      : NULL;
336}
337
338const float* AudioBuffer::high_pass_split_data_f(int channel) const {
339  return split_channels_high_.get()
340      ? split_channels_high_->fbuf_const()->channel(channel)
341      : NULL;
342}
343
344float* AudioBuffer::high_pass_split_data_f(int channel) {
345  return split_channels_high_.get()
346      ? split_channels_high_->fbuf()->channel(channel)
347      : NULL;
348}
349
350const float* const* AudioBuffer::high_pass_split_channels_f() const {
351  return split_channels_high_.get()
352      ? split_channels_high_->fbuf_const()->channels()
353      : NULL;
354}
355
356float* const* AudioBuffer::high_pass_split_channels_f() {
357  return split_channels_high_.get()
358      ? split_channels_high_->fbuf()->channels()
359      : NULL;
360}
361
362const int16_t* AudioBuffer::mixed_low_pass_data() {
363  // Currently only mixing stereo to mono is supported.
364  assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
365
366  if (num_proc_channels_ == 1) {
367    return low_pass_split_data(0);
368  }
369
370  if (!mixed_low_pass_valid_) {
371    if (!mixed_low_pass_channels_.get()) {
372      mixed_low_pass_channels_.reset(
373          new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
374    }
375    StereoToMono(low_pass_split_data(0),
376                 low_pass_split_data(1),
377                 mixed_low_pass_channels_->data(),
378                 samples_per_split_channel_);
379    mixed_low_pass_valid_ = true;
380  }
381  return mixed_low_pass_channels_->data();
382}
383
384const int16_t* AudioBuffer::low_pass_reference(int channel) const {
385  if (!reference_copied_) {
386    return NULL;
387  }
388
389  return low_pass_reference_channels_->channel(channel);
390}
391
392const float* AudioBuffer::keyboard_data() const {
393  return keyboard_data_;
394}
395
396SplitFilterStates* AudioBuffer::filter_states(int channel) {
397  assert(channel >= 0 && channel < num_proc_channels_);
398  return &filter_states_[channel];
399}
400
401void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
402  activity_ = activity;
403}
404
405AudioFrame::VADActivity AudioBuffer::activity() const {
406  return activity_;
407}
408
409int AudioBuffer::num_channels() const {
410  return num_proc_channels_;
411}
412
413int AudioBuffer::samples_per_channel() const {
414  return proc_samples_per_channel_;
415}
416
417int AudioBuffer::samples_per_split_channel() const {
418  return samples_per_split_channel_;
419}
420
421int AudioBuffer::samples_per_keyboard_channel() const {
422  // We don't resample the keyboard channel.
423  return input_samples_per_channel_;
424}
425
426// TODO(andrew): Do deinterleaving and mixing in one step?
427void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
428  assert(proc_samples_per_channel_ == input_samples_per_channel_);
429  assert(frame->num_channels_ == num_input_channels_);
430  assert(frame->samples_per_channel_ ==  proc_samples_per_channel_);
431  InitForNewData();
432  activity_ = frame->vad_activity_;
433
434  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
435    // Downmix directly; no explicit deinterleaving needed.
436    int16_t* downmixed = channels_->ibuf()->channel(0);
437    for (int i = 0; i < input_samples_per_channel_; ++i) {
438      downmixed[i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2;
439    }
440  } else {
441    assert(num_proc_channels_ == num_input_channels_);
442    int16_t* interleaved = frame->data_;
443    for (int i = 0; i < num_proc_channels_; ++i) {
444      int16_t* deinterleaved = channels_->ibuf()->channel(i);
445      int interleaved_idx = i;
446      for (int j = 0; j < proc_samples_per_channel_; ++j) {
447        deinterleaved[j] = interleaved[interleaved_idx];
448        interleaved_idx += num_proc_channels_;
449      }
450    }
451  }
452}
453
454void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
455  assert(proc_samples_per_channel_ == output_samples_per_channel_);
456  assert(num_proc_channels_ == num_input_channels_);
457  assert(frame->num_channels_ == num_proc_channels_);
458  assert(frame->samples_per_channel_ == proc_samples_per_channel_);
459  frame->vad_activity_ = activity_;
460
461  if (!data_changed) {
462    return;
463  }
464
465  int16_t* interleaved = frame->data_;
466  for (int i = 0; i < num_proc_channels_; i++) {
467    int16_t* deinterleaved = channels_->ibuf()->channel(i);
468    int interleaved_idx = i;
469    for (int j = 0; j < proc_samples_per_channel_; j++) {
470      interleaved[interleaved_idx] = deinterleaved[j];
471      interleaved_idx += num_proc_channels_;
472    }
473  }
474}
475
476void AudioBuffer::CopyLowPassToReference() {
477  reference_copied_ = true;
478  if (!low_pass_reference_channels_.get()) {
479    low_pass_reference_channels_.reset(
480        new ChannelBuffer<int16_t>(samples_per_split_channel_,
481                                   num_proc_channels_));
482  }
483  for (int i = 0; i < num_proc_channels_; i++) {
484    low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
485  }
486}
487
488}  // namespace webrtc
489