audio_buffer.cc revision 4cc763621eeeb29d0bf1d16d69b2f96d711ead2b
1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/audio_buffer.h"
12
13#include "webrtc/common_audio/include/audio_util.h"
14#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16
17namespace webrtc {
18namespace {
19
20enum {
21  kSamplesPer8kHzChannel = 80,
22  kSamplesPer16kHzChannel = 160,
23  kSamplesPer32kHzChannel = 320
24};
25
26bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
27  switch (layout) {
28    case AudioProcessing::kMono:
29    case AudioProcessing::kStereo:
30      return false;
31    case AudioProcessing::kMonoAndKeyboard:
32    case AudioProcessing::kStereoAndKeyboard:
33      return true;
34  }
35  assert(false);
36  return false;
37}
38
39int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
40  switch (layout) {
41    case AudioProcessing::kMono:
42    case AudioProcessing::kStereo:
43      assert(false);
44      return -1;
45    case AudioProcessing::kMonoAndKeyboard:
46      return 1;
47    case AudioProcessing::kStereoAndKeyboard:
48      return 2;
49  }
50  assert(false);
51  return -1;
52}
53
54
55void StereoToMono(const float* left, const float* right, float* out,
56                  int samples_per_channel) {
57  for (int i = 0; i < samples_per_channel; ++i) {
58    out[i] = (left[i] + right[i]) / 2;
59  }
60}
61
62void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
63                  int samples_per_channel) {
64  for (int i = 0; i < samples_per_channel; ++i) {
65    out[i] = (left[i] + right[i]) >> 1;
66  }
67}
68
69}  // namespace
70
71class SplitChannelBuffer {
72 public:
73  SplitChannelBuffer(int samples_per_split_channel, int num_channels)
74      : low_(samples_per_split_channel, num_channels),
75        high_(samples_per_split_channel, num_channels) {
76  }
77  ~SplitChannelBuffer() {}
78
79  int16_t* low_channel(int i) { return low_.channel(i); }
80  int16_t* high_channel(int i) { return high_.channel(i); }
81
82 private:
83  ChannelBuffer<int16_t> low_;
84  ChannelBuffer<int16_t> high_;
85};
86
87AudioBuffer::AudioBuffer(int input_samples_per_channel,
88                         int num_input_channels,
89                         int process_samples_per_channel,
90                         int num_process_channels,
91                         int output_samples_per_channel)
92  : input_samples_per_channel_(input_samples_per_channel),
93    num_input_channels_(num_input_channels),
94    proc_samples_per_channel_(process_samples_per_channel),
95    num_proc_channels_(num_process_channels),
96    output_samples_per_channel_(output_samples_per_channel),
97    samples_per_split_channel_(proc_samples_per_channel_),
98    num_mixed_channels_(0),
99    num_mixed_low_pass_channels_(0),
100    reference_copied_(false),
101    activity_(AudioFrame::kVadUnknown),
102    is_muted_(false),
103    data_(NULL),
104    keyboard_data_(NULL),
105    channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_,
106                                         num_proc_channels_)) {
107  assert(input_samples_per_channel_ > 0);
108  assert(proc_samples_per_channel_ > 0);
109  assert(output_samples_per_channel_ > 0);
110  assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
111  assert(num_proc_channels_ <= num_input_channels);
112
113  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
114    input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
115                                                 num_proc_channels_));
116  }
117
118  if (input_samples_per_channel_ != proc_samples_per_channel_ ||
119      output_samples_per_channel_ != proc_samples_per_channel_) {
120    // Create an intermediate buffer for resampling.
121    process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
122                                                   num_proc_channels_));
123  }
124
125  if (input_samples_per_channel_ != proc_samples_per_channel_) {
126    input_resamplers_.reserve(num_proc_channels_);
127    for (int i = 0; i < num_proc_channels_; ++i) {
128      input_resamplers_.push_back(
129          new PushSincResampler(input_samples_per_channel_,
130                                proc_samples_per_channel_));
131    }
132  }
133
134  if (output_samples_per_channel_ != proc_samples_per_channel_) {
135    output_resamplers_.reserve(num_proc_channels_);
136    for (int i = 0; i < num_proc_channels_; ++i) {
137      output_resamplers_.push_back(
138          new PushSincResampler(proc_samples_per_channel_,
139                                output_samples_per_channel_));
140    }
141  }
142
143  if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
144    samples_per_split_channel_ = kSamplesPer16kHzChannel;
145    split_channels_.reset(new SplitChannelBuffer(samples_per_split_channel_,
146                                                 num_proc_channels_));
147    filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
148  }
149}
150
151AudioBuffer::~AudioBuffer() {}
152
153void AudioBuffer::CopyFrom(const float* const* data,
154                           int samples_per_channel,
155                           AudioProcessing::ChannelLayout layout) {
156  assert(samples_per_channel == input_samples_per_channel_);
157  assert(ChannelsFromLayout(layout) == num_input_channels_);
158  InitForNewData();
159
160  if (HasKeyboardChannel(layout)) {
161    keyboard_data_ = data[KeyboardChannelIndex(layout)];
162  }
163
164  // Downmix.
165  const float* const* data_ptr = data;
166  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
167    StereoToMono(data[0],
168                 data[1],
169                 input_buffer_->channel(0),
170                 input_samples_per_channel_);
171    data_ptr = input_buffer_->channels();
172  }
173
174  // Resample.
175  if (input_samples_per_channel_ != proc_samples_per_channel_) {
176    for (int i = 0; i < num_proc_channels_; ++i) {
177      input_resamplers_[i]->Resample(data_ptr[i],
178                                     input_samples_per_channel_,
179                                     process_buffer_->channel(i),
180                                     proc_samples_per_channel_);
181    }
182    data_ptr = process_buffer_->channels();
183  }
184
185  // Convert to int16.
186  for (int i = 0; i < num_proc_channels_; ++i) {
187    ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
188                         channels_->channel(i));
189  }
190}
191
192void AudioBuffer::CopyTo(int samples_per_channel,
193                         AudioProcessing::ChannelLayout layout,
194                         float* const* data) {
195  assert(samples_per_channel == output_samples_per_channel_);
196  assert(ChannelsFromLayout(layout) == num_proc_channels_);
197
198  // Convert to float.
199  float* const* data_ptr = data;
200  if (output_samples_per_channel_ != proc_samples_per_channel_) {
201    // Convert to an intermediate buffer for subsequent resampling.
202    data_ptr = process_buffer_->channels();
203  }
204  for (int i = 0; i < num_proc_channels_; ++i) {
205    ScaleToFloat(channels_->channel(i), proc_samples_per_channel_, data_ptr[i]);
206  }
207
208  // Resample.
209  if (output_samples_per_channel_ != proc_samples_per_channel_) {
210    for (int i = 0; i < num_proc_channels_; ++i) {
211      output_resamplers_[i]->Resample(data_ptr[i],
212                                      proc_samples_per_channel_,
213                                      data[i],
214                                      output_samples_per_channel_);
215    }
216  }
217}
218
219void AudioBuffer::InitForNewData() {
220  data_ = NULL;
221  keyboard_data_ = NULL;
222  num_mixed_channels_ = 0;
223  num_mixed_low_pass_channels_ = 0;
224  reference_copied_ = false;
225  activity_ = AudioFrame::kVadUnknown;
226  is_muted_ = false;
227}
228
229const int16_t* AudioBuffer::data(int channel) const {
230  assert(channel >= 0 && channel < num_proc_channels_);
231  if (data_ != NULL) {
232    assert(channel == 0 && num_proc_channels_ == 1);
233    return data_;
234  }
235
236  return channels_->channel(channel);
237}
238
239int16_t* AudioBuffer::data(int channel) {
240  const AudioBuffer* t = this;
241  return const_cast<int16_t*>(t->data(channel));
242}
243
244const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
245  assert(channel >= 0 && channel < num_proc_channels_);
246  if (split_channels_.get() == NULL) {
247    return data(channel);
248  }
249
250  return split_channels_->low_channel(channel);
251}
252
253int16_t* AudioBuffer::low_pass_split_data(int channel) {
254  const AudioBuffer* t = this;
255  return const_cast<int16_t*>(t->low_pass_split_data(channel));
256}
257
258const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
259  assert(channel >= 0 && channel < num_proc_channels_);
260  if (split_channels_.get() == NULL) {
261    return NULL;
262  }
263
264  return split_channels_->high_channel(channel);
265}
266
267int16_t* AudioBuffer::high_pass_split_data(int channel) {
268  const AudioBuffer* t = this;
269  return const_cast<int16_t*>(t->high_pass_split_data(channel));
270}
271
272const int16_t* AudioBuffer::mixed_data(int channel) const {
273  assert(channel >= 0 && channel < num_mixed_channels_);
274
275  return mixed_channels_->channel(channel);
276}
277
278const int16_t* AudioBuffer::mixed_low_pass_data(int channel) const {
279  assert(channel >= 0 && channel < num_mixed_low_pass_channels_);
280
281  return mixed_low_pass_channels_->channel(channel);
282}
283
284const int16_t* AudioBuffer::low_pass_reference(int channel) const {
285  assert(channel >= 0 && channel < num_proc_channels_);
286  if (!reference_copied_) {
287    return NULL;
288  }
289
290  return low_pass_reference_channels_->channel(channel);
291}
292
293const float* AudioBuffer::keyboard_data() const {
294  return keyboard_data_;
295}
296
297SplitFilterStates* AudioBuffer::filter_states(int channel) {
298  assert(channel >= 0 && channel < num_proc_channels_);
299  return &filter_states_[channel];
300}
301
302void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
303  activity_ = activity;
304}
305
306AudioFrame::VADActivity AudioBuffer::activity() const {
307  return activity_;
308}
309
310bool AudioBuffer::is_muted() const {
311  return is_muted_;
312}
313
314int AudioBuffer::num_channels() const {
315  return num_proc_channels_;
316}
317
318int AudioBuffer::samples_per_channel() const {
319  return proc_samples_per_channel_;
320}
321
322int AudioBuffer::samples_per_split_channel() const {
323  return samples_per_split_channel_;
324}
325
326int AudioBuffer::samples_per_keyboard_channel() const {
327  // We don't resample the keyboard channel.
328  return input_samples_per_channel_;
329}
330
331// TODO(andrew): Do deinterleaving and mixing in one step?
332void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
333  assert(proc_samples_per_channel_ == input_samples_per_channel_);
334  assert(num_proc_channels_ == num_input_channels_);
335  assert(frame->num_channels_ == num_proc_channels_);
336  assert(frame->samples_per_channel_ ==  proc_samples_per_channel_);
337  InitForNewData();
338  activity_ = frame->vad_activity_;
339  if (frame->energy_ == 0) {
340    is_muted_ = true;
341  }
342
343  if (num_proc_channels_ == 1) {
344    // We can get away with a pointer assignment in this case.
345    data_ = frame->data_;
346    return;
347  }
348
349  int16_t* interleaved = frame->data_;
350  for (int i = 0; i < num_proc_channels_; i++) {
351    int16_t* deinterleaved = channels_->channel(i);
352    int interleaved_idx = i;
353    for (int j = 0; j < proc_samples_per_channel_; j++) {
354      deinterleaved[j] = interleaved[interleaved_idx];
355      interleaved_idx += num_proc_channels_;
356    }
357  }
358}
359
360void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
361  assert(proc_samples_per_channel_ == output_samples_per_channel_);
362  assert(num_proc_channels_ == num_input_channels_);
363  assert(frame->num_channels_ == num_proc_channels_);
364  assert(frame->samples_per_channel_ == proc_samples_per_channel_);
365  frame->vad_activity_ = activity_;
366
367  if (!data_changed) {
368    return;
369  }
370
371  if (num_proc_channels_ == 1) {
372    assert(data_ == frame->data_);
373    return;
374  }
375
376  int16_t* interleaved = frame->data_;
377  for (int i = 0; i < num_proc_channels_; i++) {
378    int16_t* deinterleaved = channels_->channel(i);
379    int interleaved_idx = i;
380    for (int j = 0; j < proc_samples_per_channel_; j++) {
381      interleaved[interleaved_idx] = deinterleaved[j];
382      interleaved_idx += num_proc_channels_;
383    }
384  }
385}
386
387void AudioBuffer::CopyAndMix(int num_mixed_channels) {
388  // We currently only support the stereo to mono case.
389  assert(num_proc_channels_ == 2);
390  assert(num_mixed_channels == 1);
391  if (!mixed_channels_.get()) {
392    mixed_channels_.reset(
393        new ChannelBuffer<int16_t>(proc_samples_per_channel_,
394                                   num_mixed_channels));
395  }
396
397  StereoToMono(channels_->channel(0),
398               channels_->channel(1),
399               mixed_channels_->channel(0),
400               proc_samples_per_channel_);
401
402  num_mixed_channels_ = num_mixed_channels;
403}
404
405void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
406  // We currently only support the stereo to mono case.
407  assert(num_proc_channels_ == 2);
408  assert(num_mixed_channels == 1);
409  if (!mixed_low_pass_channels_.get()) {
410    mixed_low_pass_channels_.reset(
411        new ChannelBuffer<int16_t>(samples_per_split_channel_,
412                                   num_mixed_channels));
413  }
414
415  StereoToMono(low_pass_split_data(0),
416               low_pass_split_data(1),
417               mixed_low_pass_channels_->channel(0),
418               samples_per_split_channel_);
419
420  num_mixed_low_pass_channels_ = num_mixed_channels;
421}
422
423void AudioBuffer::CopyLowPassToReference() {
424  reference_copied_ = true;
425  if (!low_pass_reference_channels_.get()) {
426    low_pass_reference_channels_.reset(
427        new ChannelBuffer<int16_t>(samples_per_split_channel_,
428                                   num_proc_channels_));
429  }
430  for (int i = 0; i < num_proc_channels_; i++) {
431    low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
432  }
433}
434
435}  // namespace webrtc
436