audio_buffer.cc revision bcfb4d0403d9e45e37a4d93de919c2a3df57ce50
1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/audio_buffer.h"
12
13#include "webrtc/common_audio/include/audio_util.h"
14#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16
17namespace webrtc {
18namespace {
19
20enum {
21  kSamplesPer8kHzChannel = 80,
22  kSamplesPer16kHzChannel = 160,
23  kSamplesPer32kHzChannel = 320
24};
25
26bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
27  switch (layout) {
28    case AudioProcessing::kMono:
29    case AudioProcessing::kStereo:
30      return false;
31    case AudioProcessing::kMonoAndKeyboard:
32    case AudioProcessing::kStereoAndKeyboard:
33      return true;
34  }
35  assert(false);
36  return false;
37}
38
39int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
40  switch (layout) {
41    case AudioProcessing::kMono:
42    case AudioProcessing::kStereo:
43      assert(false);
44      return -1;
45    case AudioProcessing::kMonoAndKeyboard:
46      return 1;
47    case AudioProcessing::kStereoAndKeyboard:
48      return 2;
49  }
50  assert(false);
51  return -1;
52}
53
54void StereoToMono(const float* left, const float* right, float* out,
55                  int samples_per_channel) {
56  for (int i = 0; i < samples_per_channel; ++i) {
57    out[i] = (left[i] + right[i]) / 2;
58  }
59}
60
61void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
62                  int samples_per_channel) {
63  for (int i = 0; i < samples_per_channel; ++i) {
64    out[i] = (left[i] + right[i]) >> 1;
65  }
66}
67
68}  // namespace
69
70// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
71// broken when someone requests write access to either ChannelBuffer, and
72// reestablished when someone requests the outdated ChannelBuffer. It is
73// therefore safe to use the return value of ibuf_const() and fbuf_const()
74// until the next call to ibuf() or fbuf(), and the return value of ibuf() and
75// fbuf() until the next call to any of the other functions.
76class IFChannelBuffer {
77 public:
78  IFChannelBuffer(int samples_per_channel, int num_channels)
79      : ivalid_(true),
80        ibuf_(samples_per_channel, num_channels),
81        fvalid_(true),
82        fbuf_(samples_per_channel, num_channels) {}
83
84  ChannelBuffer<int16_t>* ibuf() { return ibuf(false); }
85  ChannelBuffer<float>* fbuf() { return fbuf(false); }
86  const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); }
87  const ChannelBuffer<float>* fbuf_const() { return fbuf(true); }
88
89 private:
90  ChannelBuffer<int16_t>* ibuf(bool readonly) {
91    RefreshI();
92    fvalid_ = readonly;
93    return &ibuf_;
94  }
95
96  ChannelBuffer<float>* fbuf(bool readonly) {
97    RefreshF();
98    ivalid_ = readonly;
99    return &fbuf_;
100  }
101
102  void RefreshF() {
103    if (!fvalid_) {
104      assert(ivalid_);
105      const int16_t* const int_data = ibuf_.data();
106      float* const float_data = fbuf_.data();
107      const int length = fbuf_.length();
108      for (int i = 0; i < length; ++i)
109        float_data[i] = int_data[i];
110      fvalid_ = true;
111    }
112  }
113
114  void RefreshI() {
115    if (!ivalid_) {
116      assert(fvalid_);
117      const float* const float_data = fbuf_.data();
118      int16_t* const int_data = ibuf_.data();
119      const int length = ibuf_.length();
120      for (int i = 0; i < length; ++i)
121        int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
122                                     float_data[i],
123                                     std::numeric_limits<int16_t>::min());
124      ivalid_ = true;
125    }
126  }
127
128  bool ivalid_;
129  ChannelBuffer<int16_t> ibuf_;
130  bool fvalid_;
131  ChannelBuffer<float> fbuf_;
132};
133
134AudioBuffer::AudioBuffer(int input_samples_per_channel,
135                         int num_input_channels,
136                         int process_samples_per_channel,
137                         int num_process_channels,
138                         int output_samples_per_channel)
139  : input_samples_per_channel_(input_samples_per_channel),
140    num_input_channels_(num_input_channels),
141    proc_samples_per_channel_(process_samples_per_channel),
142    num_proc_channels_(num_process_channels),
143    output_samples_per_channel_(output_samples_per_channel),
144    samples_per_split_channel_(proc_samples_per_channel_),
145    mixed_low_pass_valid_(false),
146    reference_copied_(false),
147    activity_(AudioFrame::kVadUnknown),
148    keyboard_data_(NULL),
149    channels_(new IFChannelBuffer(proc_samples_per_channel_,
150                                  num_proc_channels_)) {
151  assert(input_samples_per_channel_ > 0);
152  assert(proc_samples_per_channel_ > 0);
153  assert(output_samples_per_channel_ > 0);
154  assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
155  assert(num_proc_channels_ <= num_input_channels);
156
157  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
158    input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
159                                                 num_proc_channels_));
160  }
161
162  if (input_samples_per_channel_ != proc_samples_per_channel_ ||
163      output_samples_per_channel_ != proc_samples_per_channel_) {
164    // Create an intermediate buffer for resampling.
165    process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
166                                                   num_proc_channels_));
167  }
168
169  if (input_samples_per_channel_ != proc_samples_per_channel_) {
170    input_resamplers_.reserve(num_proc_channels_);
171    for (int i = 0; i < num_proc_channels_; ++i) {
172      input_resamplers_.push_back(
173          new PushSincResampler(input_samples_per_channel_,
174                                proc_samples_per_channel_));
175    }
176  }
177
178  if (output_samples_per_channel_ != proc_samples_per_channel_) {
179    output_resamplers_.reserve(num_proc_channels_);
180    for (int i = 0; i < num_proc_channels_; ++i) {
181      output_resamplers_.push_back(
182          new PushSincResampler(proc_samples_per_channel_,
183                                output_samples_per_channel_));
184    }
185  }
186
187  if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
188    samples_per_split_channel_ = kSamplesPer16kHzChannel;
189    split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_,
190                                                  num_proc_channels_));
191    split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_,
192                                                   num_proc_channels_));
193    filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
194  }
195}
196
197AudioBuffer::~AudioBuffer() {}
198
199void AudioBuffer::CopyFrom(const float* const* data,
200                           int samples_per_channel,
201                           AudioProcessing::ChannelLayout layout) {
202  assert(samples_per_channel == input_samples_per_channel_);
203  assert(ChannelsFromLayout(layout) == num_input_channels_);
204  InitForNewData();
205
206  if (HasKeyboardChannel(layout)) {
207    keyboard_data_ = data[KeyboardChannelIndex(layout)];
208  }
209
210  // Downmix.
211  const float* const* data_ptr = data;
212  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
213    StereoToMono(data[0],
214                 data[1],
215                 input_buffer_->channel(0),
216                 input_samples_per_channel_);
217    data_ptr = input_buffer_->channels();
218  }
219
220  // Resample.
221  if (input_samples_per_channel_ != proc_samples_per_channel_) {
222    for (int i = 0; i < num_proc_channels_; ++i) {
223      input_resamplers_[i]->Resample(data_ptr[i],
224                                     input_samples_per_channel_,
225                                     process_buffer_->channel(i),
226                                     proc_samples_per_channel_);
227    }
228    data_ptr = process_buffer_->channels();
229  }
230
231  // Convert to int16.
232  for (int i = 0; i < num_proc_channels_; ++i) {
233    FloatToS16(data_ptr[i], proc_samples_per_channel_,
234               channels_->ibuf()->channel(i));
235  }
236}
237
238void AudioBuffer::CopyTo(int samples_per_channel,
239                         AudioProcessing::ChannelLayout layout,
240                         float* const* data) {
241  assert(samples_per_channel == output_samples_per_channel_);
242  assert(ChannelsFromLayout(layout) == num_proc_channels_);
243
244  // Convert to float.
245  float* const* data_ptr = data;
246  if (output_samples_per_channel_ != proc_samples_per_channel_) {
247    // Convert to an intermediate buffer for subsequent resampling.
248    data_ptr = process_buffer_->channels();
249  }
250  for (int i = 0; i < num_proc_channels_; ++i) {
251    S16ToFloat(channels_->ibuf()->channel(i),
252               proc_samples_per_channel_,
253               data_ptr[i]);
254  }
255
256  // Resample.
257  if (output_samples_per_channel_ != proc_samples_per_channel_) {
258    for (int i = 0; i < num_proc_channels_; ++i) {
259      output_resamplers_[i]->Resample(data_ptr[i],
260                                      proc_samples_per_channel_,
261                                      data[i],
262                                      output_samples_per_channel_);
263    }
264  }
265}
266
267void AudioBuffer::InitForNewData() {
268  keyboard_data_ = NULL;
269  mixed_low_pass_valid_ = false;
270  reference_copied_ = false;
271  activity_ = AudioFrame::kVadUnknown;
272}
273
274const int16_t* AudioBuffer::data(int channel) const {
275  return channels_->ibuf_const()->channel(channel);
276}
277
278int16_t* AudioBuffer::data(int channel) {
279  mixed_low_pass_valid_ = false;
280  return channels_->ibuf()->channel(channel);
281}
282
283const float* AudioBuffer::data_f(int channel) const {
284  return channels_->fbuf_const()->channel(channel);
285}
286
287float* AudioBuffer::data_f(int channel) {
288  mixed_low_pass_valid_ = false;
289  return channels_->fbuf()->channel(channel);
290}
291
292const float* const* AudioBuffer::channels_f() const {
293  return channels_->fbuf_const()->channels();
294}
295
296float* const* AudioBuffer::channels_f() {
297  mixed_low_pass_valid_ = false;
298  return channels_->fbuf()->channels();
299}
300
301const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
302  return split_channels_low_.get()
303      ? split_channels_low_->ibuf_const()->channel(channel)
304      : data(channel);
305}
306
307int16_t* AudioBuffer::low_pass_split_data(int channel) {
308  mixed_low_pass_valid_ = false;
309  return split_channels_low_.get()
310      ? split_channels_low_->ibuf()->channel(channel)
311      : data(channel);
312}
313
314const float* AudioBuffer::low_pass_split_data_f(int channel) const {
315  return split_channels_low_.get()
316      ? split_channels_low_->fbuf_const()->channel(channel)
317      : data_f(channel);
318}
319
320float* AudioBuffer::low_pass_split_data_f(int channel) {
321  mixed_low_pass_valid_ = false;
322  return split_channels_low_.get()
323      ? split_channels_low_->fbuf()->channel(channel)
324      : data_f(channel);
325}
326
327const float* const* AudioBuffer::low_pass_split_channels_f() const {
328  return split_channels_low_.get()
329      ? split_channels_low_->fbuf_const()->channels()
330      : channels_f();
331}
332
333float* const* AudioBuffer::low_pass_split_channels_f() {
334  mixed_low_pass_valid_ = false;
335  return split_channels_low_.get()
336      ? split_channels_low_->fbuf()->channels()
337      : channels_f();
338}
339
340const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
341  return split_channels_high_.get()
342      ? split_channels_high_->ibuf_const()->channel(channel)
343      : NULL;
344}
345
346int16_t* AudioBuffer::high_pass_split_data(int channel) {
347  return split_channels_high_.get()
348      ? split_channels_high_->ibuf()->channel(channel)
349      : NULL;
350}
351
352const float* AudioBuffer::high_pass_split_data_f(int channel) const {
353  return split_channels_high_.get()
354      ? split_channels_high_->fbuf_const()->channel(channel)
355      : NULL;
356}
357
358float* AudioBuffer::high_pass_split_data_f(int channel) {
359  return split_channels_high_.get()
360      ? split_channels_high_->fbuf()->channel(channel)
361      : NULL;
362}
363
364const float* const* AudioBuffer::high_pass_split_channels_f() const {
365  return split_channels_high_.get()
366      ? split_channels_high_->fbuf_const()->channels()
367      : NULL;
368}
369
370float* const* AudioBuffer::high_pass_split_channels_f() {
371  return split_channels_high_.get()
372      ? split_channels_high_->fbuf()->channels()
373      : NULL;
374}
375
376const int16_t* AudioBuffer::mixed_low_pass_data() {
377  // Currently only mixing stereo to mono is supported.
378  assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
379
380  if (num_proc_channels_ == 1) {
381    return low_pass_split_data(0);
382  }
383
384  if (!mixed_low_pass_valid_) {
385    if (!mixed_low_pass_channels_.get()) {
386      mixed_low_pass_channels_.reset(
387          new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
388    }
389    StereoToMono(low_pass_split_data(0),
390                 low_pass_split_data(1),
391                 mixed_low_pass_channels_->data(),
392                 samples_per_split_channel_);
393    mixed_low_pass_valid_ = true;
394  }
395  return mixed_low_pass_channels_->data();
396}
397
398const int16_t* AudioBuffer::low_pass_reference(int channel) const {
399  if (!reference_copied_) {
400    return NULL;
401  }
402
403  return low_pass_reference_channels_->channel(channel);
404}
405
406const float* AudioBuffer::keyboard_data() const {
407  return keyboard_data_;
408}
409
410SplitFilterStates* AudioBuffer::filter_states(int channel) {
411  assert(channel >= 0 && channel < num_proc_channels_);
412  return &filter_states_[channel];
413}
414
415void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
416  activity_ = activity;
417}
418
419AudioFrame::VADActivity AudioBuffer::activity() const {
420  return activity_;
421}
422
423int AudioBuffer::num_channels() const {
424  return num_proc_channels_;
425}
426
427int AudioBuffer::samples_per_channel() const {
428  return proc_samples_per_channel_;
429}
430
431int AudioBuffer::samples_per_split_channel() const {
432  return samples_per_split_channel_;
433}
434
435int AudioBuffer::samples_per_keyboard_channel() const {
436  // We don't resample the keyboard channel.
437  return input_samples_per_channel_;
438}
439
440// TODO(andrew): Do deinterleaving and mixing in one step?
441void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
442  assert(proc_samples_per_channel_ == input_samples_per_channel_);
443  assert(frame->num_channels_ == num_input_channels_);
444  assert(frame->samples_per_channel_ ==  proc_samples_per_channel_);
445  InitForNewData();
446  activity_ = frame->vad_activity_;
447
448  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
449    // Downmix directly; no explicit deinterleaving needed.
450    int16_t* downmixed = channels_->ibuf()->channel(0);
451    for (int i = 0; i < input_samples_per_channel_; ++i) {
452      // HACK(ajm): The downmixing in the int16_t path is in practice never
453      // called from production code. We do this weird scaling to and from float
454      // to satisfy tests checking for bit-exactness with the float path.
455      float downmix_float = (S16ToFloat(frame->data_[i * 2]) +
456                             S16ToFloat(frame->data_[i * 2 + 1])) / 2;
457      downmixed[i] = FloatToS16(downmix_float);
458    }
459  } else {
460    assert(num_proc_channels_ == num_input_channels_);
461    int16_t* interleaved = frame->data_;
462    for (int i = 0; i < num_proc_channels_; ++i) {
463      int16_t* deinterleaved = channels_->ibuf()->channel(i);
464      int interleaved_idx = i;
465      for (int j = 0; j < proc_samples_per_channel_; ++j) {
466        deinterleaved[j] = interleaved[interleaved_idx];
467        interleaved_idx += num_proc_channels_;
468      }
469    }
470  }
471}
472
473void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
474  assert(proc_samples_per_channel_ == output_samples_per_channel_);
475  assert(num_proc_channels_ == num_input_channels_);
476  assert(frame->num_channels_ == num_proc_channels_);
477  assert(frame->samples_per_channel_ == proc_samples_per_channel_);
478  frame->vad_activity_ = activity_;
479
480  if (!data_changed) {
481    return;
482  }
483
484  int16_t* interleaved = frame->data_;
485  for (int i = 0; i < num_proc_channels_; i++) {
486    int16_t* deinterleaved = channels_->ibuf()->channel(i);
487    int interleaved_idx = i;
488    for (int j = 0; j < proc_samples_per_channel_; j++) {
489      interleaved[interleaved_idx] = deinterleaved[j];
490      interleaved_idx += num_proc_channels_;
491    }
492  }
493}
494
495void AudioBuffer::CopyLowPassToReference() {
496  reference_copied_ = true;
497  if (!low_pass_reference_channels_.get()) {
498    low_pass_reference_channels_.reset(
499        new ChannelBuffer<int16_t>(samples_per_split_channel_,
500                                   num_proc_channels_));
501  }
502  for (int i = 0; i < num_proc_channels_; i++) {
503    low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
504  }
505}
506
507}  // namespace webrtc
508