audio_buffer.cc revision 8328e7c44d59bb9fcbc7f8a033beb3d073929518
1/* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "webrtc/modules/audio_processing/audio_buffer.h" 12 13#include "webrtc/common_audio/include/audio_util.h" 14#include "webrtc/common_audio/resampler/push_sinc_resampler.h" 15#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 16 17namespace webrtc { 18namespace { 19 20enum { 21 kSamplesPer8kHzChannel = 80, 22 kSamplesPer16kHzChannel = 160, 23 kSamplesPer32kHzChannel = 320 24}; 25 26bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) { 27 switch (layout) { 28 case AudioProcessing::kMono: 29 case AudioProcessing::kStereo: 30 return false; 31 case AudioProcessing::kMonoAndKeyboard: 32 case AudioProcessing::kStereoAndKeyboard: 33 return true; 34 } 35 assert(false); 36 return false; 37} 38 39int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { 40 switch (layout) { 41 case AudioProcessing::kMono: 42 case AudioProcessing::kStereo: 43 assert(false); 44 return -1; 45 case AudioProcessing::kMonoAndKeyboard: 46 return 1; 47 case AudioProcessing::kStereoAndKeyboard: 48 return 2; 49 } 50 assert(false); 51 return -1; 52} 53 54template <typename T> 55void StereoToMono(const T* left, const T* right, T* out, 56 int samples_per_channel) { 57 for (int i = 0; i < samples_per_channel; ++i) 58 out[i] = (left[i] + right[i]) / 2; 59} 60 61} // namespace 62 63// One int16_t and one float ChannelBuffer that are kept in sync. The sync is 64// broken when someone requests write access to either ChannelBuffer, and 65// reestablished when someone requests the outdated ChannelBuffer. It is 66// therefore safe to use the return value of ibuf_const() and fbuf_const() 67// until the next call to ibuf() or fbuf(), and the return value of ibuf() and 68// fbuf() until the next call to any of the other functions. 69class IFChannelBuffer { 70 public: 71 IFChannelBuffer(int samples_per_channel, int num_channels) 72 : ivalid_(true), 73 ibuf_(samples_per_channel, num_channels), 74 fvalid_(true), 75 fbuf_(samples_per_channel, num_channels) {} 76 77 ChannelBuffer<int16_t>* ibuf() { return ibuf(false); } 78 ChannelBuffer<float>* fbuf() { return fbuf(false); } 79 const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); } 80 const ChannelBuffer<float>* fbuf_const() { return fbuf(true); } 81 82 private: 83 ChannelBuffer<int16_t>* ibuf(bool readonly) { 84 RefreshI(); 85 fvalid_ = readonly; 86 return &ibuf_; 87 } 88 89 ChannelBuffer<float>* fbuf(bool readonly) { 90 RefreshF(); 91 ivalid_ = readonly; 92 return &fbuf_; 93 } 94 95 void RefreshF() { 96 if (!fvalid_) { 97 assert(ivalid_); 98 const int16_t* const int_data = ibuf_.data(); 99 float* const float_data = fbuf_.data(); 100 const int length = fbuf_.length(); 101 for (int i = 0; i < length; ++i) 102 float_data[i] = int_data[i]; 103 fvalid_ = true; 104 } 105 } 106 107 void RefreshI() { 108 if (!ivalid_) { 109 assert(fvalid_); 110 FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data()); 111 ivalid_ = true; 112 } 113 } 114 115 bool ivalid_; 116 ChannelBuffer<int16_t> ibuf_; 117 bool fvalid_; 118 ChannelBuffer<float> fbuf_; 119}; 120 121AudioBuffer::AudioBuffer(int input_samples_per_channel, 122 int num_input_channels, 123 int process_samples_per_channel, 124 int num_process_channels, 125 int output_samples_per_channel) 126 : input_samples_per_channel_(input_samples_per_channel), 127 num_input_channels_(num_input_channels), 128 proc_samples_per_channel_(process_samples_per_channel), 129 num_proc_channels_(num_process_channels), 130 output_samples_per_channel_(output_samples_per_channel), 131 samples_per_split_channel_(proc_samples_per_channel_), 132 mixed_low_pass_valid_(false), 133 reference_copied_(false), 134 activity_(AudioFrame::kVadUnknown), 135 keyboard_data_(NULL), 136 channels_(new IFChannelBuffer(proc_samples_per_channel_, 137 num_proc_channels_)) { 138 assert(input_samples_per_channel_ > 0); 139 assert(proc_samples_per_channel_ > 0); 140 assert(output_samples_per_channel_ > 0); 141 assert(num_input_channels_ > 0 && num_input_channels_ <= 2); 142 assert(num_proc_channels_ <= num_input_channels); 143 144 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 145 input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_, 146 num_proc_channels_)); 147 } 148 149 if (input_samples_per_channel_ != proc_samples_per_channel_ || 150 output_samples_per_channel_ != proc_samples_per_channel_) { 151 // Create an intermediate buffer for resampling. 152 process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_, 153 num_proc_channels_)); 154 } 155 156 if (input_samples_per_channel_ != proc_samples_per_channel_) { 157 input_resamplers_.reserve(num_proc_channels_); 158 for (int i = 0; i < num_proc_channels_; ++i) { 159 input_resamplers_.push_back( 160 new PushSincResampler(input_samples_per_channel_, 161 proc_samples_per_channel_)); 162 } 163 } 164 165 if (output_samples_per_channel_ != proc_samples_per_channel_) { 166 output_resamplers_.reserve(num_proc_channels_); 167 for (int i = 0; i < num_proc_channels_; ++i) { 168 output_resamplers_.push_back( 169 new PushSincResampler(proc_samples_per_channel_, 170 output_samples_per_channel_)); 171 } 172 } 173 174 if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) { 175 samples_per_split_channel_ = kSamplesPer16kHzChannel; 176 split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_, 177 num_proc_channels_)); 178 split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_, 179 num_proc_channels_)); 180 filter_states_.reset(new SplitFilterStates[num_proc_channels_]); 181 } 182} 183 184AudioBuffer::~AudioBuffer() {} 185 186void AudioBuffer::CopyFrom(const float* const* data, 187 int samples_per_channel, 188 AudioProcessing::ChannelLayout layout) { 189 assert(samples_per_channel == input_samples_per_channel_); 190 assert(ChannelsFromLayout(layout) == num_input_channels_); 191 InitForNewData(); 192 193 if (HasKeyboardChannel(layout)) { 194 keyboard_data_ = data[KeyboardChannelIndex(layout)]; 195 } 196 197 // Downmix. 198 const float* const* data_ptr = data; 199 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 200 StereoToMono(data[0], 201 data[1], 202 input_buffer_->channel(0), 203 input_samples_per_channel_); 204 data_ptr = input_buffer_->channels(); 205 } 206 207 // Resample. 208 if (input_samples_per_channel_ != proc_samples_per_channel_) { 209 for (int i = 0; i < num_proc_channels_; ++i) { 210 input_resamplers_[i]->Resample(data_ptr[i], 211 input_samples_per_channel_, 212 process_buffer_->channel(i), 213 proc_samples_per_channel_); 214 } 215 data_ptr = process_buffer_->channels(); 216 } 217 218 // Convert to the S16 range. 219 for (int i = 0; i < num_proc_channels_; ++i) { 220 FloatToFloatS16(data_ptr[i], proc_samples_per_channel_, 221 channels_->fbuf()->channel(i)); 222 } 223} 224 225void AudioBuffer::CopyTo(int samples_per_channel, 226 AudioProcessing::ChannelLayout layout, 227 float* const* data) { 228 assert(samples_per_channel == output_samples_per_channel_); 229 assert(ChannelsFromLayout(layout) == num_proc_channels_); 230 231 // Convert to the float range. 232 float* const* data_ptr = data; 233 if (output_samples_per_channel_ != proc_samples_per_channel_) { 234 // Convert to an intermediate buffer for subsequent resampling. 235 data_ptr = process_buffer_->channels(); 236 } 237 for (int i = 0; i < num_proc_channels_; ++i) { 238 FloatS16ToFloat(channels_->fbuf()->channel(i), proc_samples_per_channel_, 239 data_ptr[i]); 240 } 241 242 // Resample. 243 if (output_samples_per_channel_ != proc_samples_per_channel_) { 244 for (int i = 0; i < num_proc_channels_; ++i) { 245 output_resamplers_[i]->Resample(data_ptr[i], 246 proc_samples_per_channel_, 247 data[i], 248 output_samples_per_channel_); 249 } 250 } 251} 252 253void AudioBuffer::InitForNewData() { 254 keyboard_data_ = NULL; 255 mixed_low_pass_valid_ = false; 256 reference_copied_ = false; 257 activity_ = AudioFrame::kVadUnknown; 258} 259 260const int16_t* AudioBuffer::data(int channel) const { 261 return channels_->ibuf_const()->channel(channel); 262} 263 264int16_t* AudioBuffer::data(int channel) { 265 mixed_low_pass_valid_ = false; 266 return channels_->ibuf()->channel(channel); 267} 268 269const float* AudioBuffer::data_f(int channel) const { 270 return channels_->fbuf_const()->channel(channel); 271} 272 273float* AudioBuffer::data_f(int channel) { 274 mixed_low_pass_valid_ = false; 275 return channels_->fbuf()->channel(channel); 276} 277 278const float* const* AudioBuffer::channels_f() const { 279 return channels_->fbuf_const()->channels(); 280} 281 282float* const* AudioBuffer::channels_f() { 283 mixed_low_pass_valid_ = false; 284 return channels_->fbuf()->channels(); 285} 286 287const int16_t* AudioBuffer::low_pass_split_data(int channel) const { 288 return split_channels_low_.get() 289 ? split_channels_low_->ibuf_const()->channel(channel) 290 : data(channel); 291} 292 293int16_t* AudioBuffer::low_pass_split_data(int channel) { 294 mixed_low_pass_valid_ = false; 295 return split_channels_low_.get() 296 ? split_channels_low_->ibuf()->channel(channel) 297 : data(channel); 298} 299 300const float* AudioBuffer::low_pass_split_data_f(int channel) const { 301 return split_channels_low_.get() 302 ? split_channels_low_->fbuf_const()->channel(channel) 303 : data_f(channel); 304} 305 306float* AudioBuffer::low_pass_split_data_f(int channel) { 307 mixed_low_pass_valid_ = false; 308 return split_channels_low_.get() 309 ? split_channels_low_->fbuf()->channel(channel) 310 : data_f(channel); 311} 312 313const float* const* AudioBuffer::low_pass_split_channels_f() const { 314 return split_channels_low_.get() 315 ? split_channels_low_->fbuf_const()->channels() 316 : channels_f(); 317} 318 319float* const* AudioBuffer::low_pass_split_channels_f() { 320 mixed_low_pass_valid_ = false; 321 return split_channels_low_.get() 322 ? split_channels_low_->fbuf()->channels() 323 : channels_f(); 324} 325 326const int16_t* AudioBuffer::high_pass_split_data(int channel) const { 327 return split_channels_high_.get() 328 ? split_channels_high_->ibuf_const()->channel(channel) 329 : NULL; 330} 331 332int16_t* AudioBuffer::high_pass_split_data(int channel) { 333 return split_channels_high_.get() 334 ? split_channels_high_->ibuf()->channel(channel) 335 : NULL; 336} 337 338const float* AudioBuffer::high_pass_split_data_f(int channel) const { 339 return split_channels_high_.get() 340 ? split_channels_high_->fbuf_const()->channel(channel) 341 : NULL; 342} 343 344float* AudioBuffer::high_pass_split_data_f(int channel) { 345 return split_channels_high_.get() 346 ? split_channels_high_->fbuf()->channel(channel) 347 : NULL; 348} 349 350const float* const* AudioBuffer::high_pass_split_channels_f() const { 351 return split_channels_high_.get() 352 ? split_channels_high_->fbuf_const()->channels() 353 : NULL; 354} 355 356float* const* AudioBuffer::high_pass_split_channels_f() { 357 return split_channels_high_.get() 358 ? split_channels_high_->fbuf()->channels() 359 : NULL; 360} 361 362const int16_t* AudioBuffer::mixed_low_pass_data() { 363 // Currently only mixing stereo to mono is supported. 364 assert(num_proc_channels_ == 1 || num_proc_channels_ == 2); 365 366 if (num_proc_channels_ == 1) { 367 return low_pass_split_data(0); 368 } 369 370 if (!mixed_low_pass_valid_) { 371 if (!mixed_low_pass_channels_.get()) { 372 mixed_low_pass_channels_.reset( 373 new ChannelBuffer<int16_t>(samples_per_split_channel_, 1)); 374 } 375 StereoToMono(low_pass_split_data(0), 376 low_pass_split_data(1), 377 mixed_low_pass_channels_->data(), 378 samples_per_split_channel_); 379 mixed_low_pass_valid_ = true; 380 } 381 return mixed_low_pass_channels_->data(); 382} 383 384const int16_t* AudioBuffer::low_pass_reference(int channel) const { 385 if (!reference_copied_) { 386 return NULL; 387 } 388 389 return low_pass_reference_channels_->channel(channel); 390} 391 392const float* AudioBuffer::keyboard_data() const { 393 return keyboard_data_; 394} 395 396SplitFilterStates* AudioBuffer::filter_states(int channel) { 397 assert(channel >= 0 && channel < num_proc_channels_); 398 return &filter_states_[channel]; 399} 400 401void AudioBuffer::set_activity(AudioFrame::VADActivity activity) { 402 activity_ = activity; 403} 404 405AudioFrame::VADActivity AudioBuffer::activity() const { 406 return activity_; 407} 408 409int AudioBuffer::num_channels() const { 410 return num_proc_channels_; 411} 412 413int AudioBuffer::samples_per_channel() const { 414 return proc_samples_per_channel_; 415} 416 417int AudioBuffer::samples_per_split_channel() const { 418 return samples_per_split_channel_; 419} 420 421int AudioBuffer::samples_per_keyboard_channel() const { 422 // We don't resample the keyboard channel. 423 return input_samples_per_channel_; 424} 425 426// TODO(andrew): Do deinterleaving and mixing in one step? 427void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { 428 assert(proc_samples_per_channel_ == input_samples_per_channel_); 429 assert(frame->num_channels_ == num_input_channels_); 430 assert(frame->samples_per_channel_ == proc_samples_per_channel_); 431 InitForNewData(); 432 activity_ = frame->vad_activity_; 433 434 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 435 // Downmix directly; no explicit deinterleaving needed. 436 int16_t* downmixed = channels_->ibuf()->channel(0); 437 for (int i = 0; i < input_samples_per_channel_; ++i) { 438 downmixed[i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2; 439 } 440 } else { 441 assert(num_proc_channels_ == num_input_channels_); 442 int16_t* interleaved = frame->data_; 443 for (int i = 0; i < num_proc_channels_; ++i) { 444 int16_t* deinterleaved = channels_->ibuf()->channel(i); 445 int interleaved_idx = i; 446 for (int j = 0; j < proc_samples_per_channel_; ++j) { 447 deinterleaved[j] = interleaved[interleaved_idx]; 448 interleaved_idx += num_proc_channels_; 449 } 450 } 451 } 452} 453 454void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const { 455 assert(proc_samples_per_channel_ == output_samples_per_channel_); 456 assert(num_proc_channels_ == num_input_channels_); 457 assert(frame->num_channels_ == num_proc_channels_); 458 assert(frame->samples_per_channel_ == proc_samples_per_channel_); 459 frame->vad_activity_ = activity_; 460 461 if (!data_changed) { 462 return; 463 } 464 465 int16_t* interleaved = frame->data_; 466 for (int i = 0; i < num_proc_channels_; i++) { 467 int16_t* deinterleaved = channels_->ibuf()->channel(i); 468 int interleaved_idx = i; 469 for (int j = 0; j < proc_samples_per_channel_; j++) { 470 interleaved[interleaved_idx] = deinterleaved[j]; 471 interleaved_idx += num_proc_channels_; 472 } 473 } 474} 475 476void AudioBuffer::CopyLowPassToReference() { 477 reference_copied_ = true; 478 if (!low_pass_reference_channels_.get()) { 479 low_pass_reference_channels_.reset( 480 new ChannelBuffer<int16_t>(samples_per_split_channel_, 481 num_proc_channels_)); 482 } 483 for (int i = 0; i < num_proc_channels_; i++) { 484 low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i); 485 } 486} 487 488} // namespace webrtc 489