audio_buffer.cc revision bcfb4d0403d9e45e37a4d93de919c2a3df57ce50
1/* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "webrtc/modules/audio_processing/audio_buffer.h" 12 13#include "webrtc/common_audio/include/audio_util.h" 14#include "webrtc/common_audio/resampler/push_sinc_resampler.h" 15#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 16 17namespace webrtc { 18namespace { 19 20enum { 21 kSamplesPer8kHzChannel = 80, 22 kSamplesPer16kHzChannel = 160, 23 kSamplesPer32kHzChannel = 320 24}; 25 26bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) { 27 switch (layout) { 28 case AudioProcessing::kMono: 29 case AudioProcessing::kStereo: 30 return false; 31 case AudioProcessing::kMonoAndKeyboard: 32 case AudioProcessing::kStereoAndKeyboard: 33 return true; 34 } 35 assert(false); 36 return false; 37} 38 39int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { 40 switch (layout) { 41 case AudioProcessing::kMono: 42 case AudioProcessing::kStereo: 43 assert(false); 44 return -1; 45 case AudioProcessing::kMonoAndKeyboard: 46 return 1; 47 case AudioProcessing::kStereoAndKeyboard: 48 return 2; 49 } 50 assert(false); 51 return -1; 52} 53 54void StereoToMono(const float* left, const float* right, float* out, 55 int samples_per_channel) { 56 for (int i = 0; i < samples_per_channel; ++i) { 57 out[i] = (left[i] + right[i]) / 2; 58 } 59} 60 61void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, 62 int samples_per_channel) { 63 for (int i = 0; i < samples_per_channel; ++i) { 64 out[i] = (left[i] + right[i]) >> 1; 65 } 66} 67 68} // namespace 69 70// One int16_t and one float ChannelBuffer that are kept in sync. The sync is 71// broken when someone requests write access to either ChannelBuffer, and 72// reestablished when someone requests the outdated ChannelBuffer. It is 73// therefore safe to use the return value of ibuf_const() and fbuf_const() 74// until the next call to ibuf() or fbuf(), and the return value of ibuf() and 75// fbuf() until the next call to any of the other functions. 76class IFChannelBuffer { 77 public: 78 IFChannelBuffer(int samples_per_channel, int num_channels) 79 : ivalid_(true), 80 ibuf_(samples_per_channel, num_channels), 81 fvalid_(true), 82 fbuf_(samples_per_channel, num_channels) {} 83 84 ChannelBuffer<int16_t>* ibuf() { return ibuf(false); } 85 ChannelBuffer<float>* fbuf() { return fbuf(false); } 86 const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); } 87 const ChannelBuffer<float>* fbuf_const() { return fbuf(true); } 88 89 private: 90 ChannelBuffer<int16_t>* ibuf(bool readonly) { 91 RefreshI(); 92 fvalid_ = readonly; 93 return &ibuf_; 94 } 95 96 ChannelBuffer<float>* fbuf(bool readonly) { 97 RefreshF(); 98 ivalid_ = readonly; 99 return &fbuf_; 100 } 101 102 void RefreshF() { 103 if (!fvalid_) { 104 assert(ivalid_); 105 const int16_t* const int_data = ibuf_.data(); 106 float* const float_data = fbuf_.data(); 107 const int length = fbuf_.length(); 108 for (int i = 0; i < length; ++i) 109 float_data[i] = int_data[i]; 110 fvalid_ = true; 111 } 112 } 113 114 void RefreshI() { 115 if (!ivalid_) { 116 assert(fvalid_); 117 const float* const float_data = fbuf_.data(); 118 int16_t* const int_data = ibuf_.data(); 119 const int length = ibuf_.length(); 120 for (int i = 0; i < length; ++i) 121 int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(), 122 float_data[i], 123 std::numeric_limits<int16_t>::min()); 124 ivalid_ = true; 125 } 126 } 127 128 bool ivalid_; 129 ChannelBuffer<int16_t> ibuf_; 130 bool fvalid_; 131 ChannelBuffer<float> fbuf_; 132}; 133 134AudioBuffer::AudioBuffer(int input_samples_per_channel, 135 int num_input_channels, 136 int process_samples_per_channel, 137 int num_process_channels, 138 int output_samples_per_channel) 139 : input_samples_per_channel_(input_samples_per_channel), 140 num_input_channels_(num_input_channels), 141 proc_samples_per_channel_(process_samples_per_channel), 142 num_proc_channels_(num_process_channels), 143 output_samples_per_channel_(output_samples_per_channel), 144 samples_per_split_channel_(proc_samples_per_channel_), 145 mixed_low_pass_valid_(false), 146 reference_copied_(false), 147 activity_(AudioFrame::kVadUnknown), 148 keyboard_data_(NULL), 149 channels_(new IFChannelBuffer(proc_samples_per_channel_, 150 num_proc_channels_)) { 151 assert(input_samples_per_channel_ > 0); 152 assert(proc_samples_per_channel_ > 0); 153 assert(output_samples_per_channel_ > 0); 154 assert(num_input_channels_ > 0 && num_input_channels_ <= 2); 155 assert(num_proc_channels_ <= num_input_channels); 156 157 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 158 input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_, 159 num_proc_channels_)); 160 } 161 162 if (input_samples_per_channel_ != proc_samples_per_channel_ || 163 output_samples_per_channel_ != proc_samples_per_channel_) { 164 // Create an intermediate buffer for resampling. 165 process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_, 166 num_proc_channels_)); 167 } 168 169 if (input_samples_per_channel_ != proc_samples_per_channel_) { 170 input_resamplers_.reserve(num_proc_channels_); 171 for (int i = 0; i < num_proc_channels_; ++i) { 172 input_resamplers_.push_back( 173 new PushSincResampler(input_samples_per_channel_, 174 proc_samples_per_channel_)); 175 } 176 } 177 178 if (output_samples_per_channel_ != proc_samples_per_channel_) { 179 output_resamplers_.reserve(num_proc_channels_); 180 for (int i = 0; i < num_proc_channels_; ++i) { 181 output_resamplers_.push_back( 182 new PushSincResampler(proc_samples_per_channel_, 183 output_samples_per_channel_)); 184 } 185 } 186 187 if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) { 188 samples_per_split_channel_ = kSamplesPer16kHzChannel; 189 split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_, 190 num_proc_channels_)); 191 split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_, 192 num_proc_channels_)); 193 filter_states_.reset(new SplitFilterStates[num_proc_channels_]); 194 } 195} 196 197AudioBuffer::~AudioBuffer() {} 198 199void AudioBuffer::CopyFrom(const float* const* data, 200 int samples_per_channel, 201 AudioProcessing::ChannelLayout layout) { 202 assert(samples_per_channel == input_samples_per_channel_); 203 assert(ChannelsFromLayout(layout) == num_input_channels_); 204 InitForNewData(); 205 206 if (HasKeyboardChannel(layout)) { 207 keyboard_data_ = data[KeyboardChannelIndex(layout)]; 208 } 209 210 // Downmix. 211 const float* const* data_ptr = data; 212 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 213 StereoToMono(data[0], 214 data[1], 215 input_buffer_->channel(0), 216 input_samples_per_channel_); 217 data_ptr = input_buffer_->channels(); 218 } 219 220 // Resample. 221 if (input_samples_per_channel_ != proc_samples_per_channel_) { 222 for (int i = 0; i < num_proc_channels_; ++i) { 223 input_resamplers_[i]->Resample(data_ptr[i], 224 input_samples_per_channel_, 225 process_buffer_->channel(i), 226 proc_samples_per_channel_); 227 } 228 data_ptr = process_buffer_->channels(); 229 } 230 231 // Convert to int16. 232 for (int i = 0; i < num_proc_channels_; ++i) { 233 FloatToS16(data_ptr[i], proc_samples_per_channel_, 234 channels_->ibuf()->channel(i)); 235 } 236} 237 238void AudioBuffer::CopyTo(int samples_per_channel, 239 AudioProcessing::ChannelLayout layout, 240 float* const* data) { 241 assert(samples_per_channel == output_samples_per_channel_); 242 assert(ChannelsFromLayout(layout) == num_proc_channels_); 243 244 // Convert to float. 245 float* const* data_ptr = data; 246 if (output_samples_per_channel_ != proc_samples_per_channel_) { 247 // Convert to an intermediate buffer for subsequent resampling. 248 data_ptr = process_buffer_->channels(); 249 } 250 for (int i = 0; i < num_proc_channels_; ++i) { 251 S16ToFloat(channels_->ibuf()->channel(i), 252 proc_samples_per_channel_, 253 data_ptr[i]); 254 } 255 256 // Resample. 257 if (output_samples_per_channel_ != proc_samples_per_channel_) { 258 for (int i = 0; i < num_proc_channels_; ++i) { 259 output_resamplers_[i]->Resample(data_ptr[i], 260 proc_samples_per_channel_, 261 data[i], 262 output_samples_per_channel_); 263 } 264 } 265} 266 267void AudioBuffer::InitForNewData() { 268 keyboard_data_ = NULL; 269 mixed_low_pass_valid_ = false; 270 reference_copied_ = false; 271 activity_ = AudioFrame::kVadUnknown; 272} 273 274const int16_t* AudioBuffer::data(int channel) const { 275 return channels_->ibuf_const()->channel(channel); 276} 277 278int16_t* AudioBuffer::data(int channel) { 279 mixed_low_pass_valid_ = false; 280 return channels_->ibuf()->channel(channel); 281} 282 283const float* AudioBuffer::data_f(int channel) const { 284 return channels_->fbuf_const()->channel(channel); 285} 286 287float* AudioBuffer::data_f(int channel) { 288 mixed_low_pass_valid_ = false; 289 return channels_->fbuf()->channel(channel); 290} 291 292const float* const* AudioBuffer::channels_f() const { 293 return channels_->fbuf_const()->channels(); 294} 295 296float* const* AudioBuffer::channels_f() { 297 mixed_low_pass_valid_ = false; 298 return channels_->fbuf()->channels(); 299} 300 301const int16_t* AudioBuffer::low_pass_split_data(int channel) const { 302 return split_channels_low_.get() 303 ? split_channels_low_->ibuf_const()->channel(channel) 304 : data(channel); 305} 306 307int16_t* AudioBuffer::low_pass_split_data(int channel) { 308 mixed_low_pass_valid_ = false; 309 return split_channels_low_.get() 310 ? split_channels_low_->ibuf()->channel(channel) 311 : data(channel); 312} 313 314const float* AudioBuffer::low_pass_split_data_f(int channel) const { 315 return split_channels_low_.get() 316 ? split_channels_low_->fbuf_const()->channel(channel) 317 : data_f(channel); 318} 319 320float* AudioBuffer::low_pass_split_data_f(int channel) { 321 mixed_low_pass_valid_ = false; 322 return split_channels_low_.get() 323 ? split_channels_low_->fbuf()->channel(channel) 324 : data_f(channel); 325} 326 327const float* const* AudioBuffer::low_pass_split_channels_f() const { 328 return split_channels_low_.get() 329 ? split_channels_low_->fbuf_const()->channels() 330 : channels_f(); 331} 332 333float* const* AudioBuffer::low_pass_split_channels_f() { 334 mixed_low_pass_valid_ = false; 335 return split_channels_low_.get() 336 ? split_channels_low_->fbuf()->channels() 337 : channels_f(); 338} 339 340const int16_t* AudioBuffer::high_pass_split_data(int channel) const { 341 return split_channels_high_.get() 342 ? split_channels_high_->ibuf_const()->channel(channel) 343 : NULL; 344} 345 346int16_t* AudioBuffer::high_pass_split_data(int channel) { 347 return split_channels_high_.get() 348 ? split_channels_high_->ibuf()->channel(channel) 349 : NULL; 350} 351 352const float* AudioBuffer::high_pass_split_data_f(int channel) const { 353 return split_channels_high_.get() 354 ? split_channels_high_->fbuf_const()->channel(channel) 355 : NULL; 356} 357 358float* AudioBuffer::high_pass_split_data_f(int channel) { 359 return split_channels_high_.get() 360 ? split_channels_high_->fbuf()->channel(channel) 361 : NULL; 362} 363 364const float* const* AudioBuffer::high_pass_split_channels_f() const { 365 return split_channels_high_.get() 366 ? split_channels_high_->fbuf_const()->channels() 367 : NULL; 368} 369 370float* const* AudioBuffer::high_pass_split_channels_f() { 371 return split_channels_high_.get() 372 ? split_channels_high_->fbuf()->channels() 373 : NULL; 374} 375 376const int16_t* AudioBuffer::mixed_low_pass_data() { 377 // Currently only mixing stereo to mono is supported. 378 assert(num_proc_channels_ == 1 || num_proc_channels_ == 2); 379 380 if (num_proc_channels_ == 1) { 381 return low_pass_split_data(0); 382 } 383 384 if (!mixed_low_pass_valid_) { 385 if (!mixed_low_pass_channels_.get()) { 386 mixed_low_pass_channels_.reset( 387 new ChannelBuffer<int16_t>(samples_per_split_channel_, 1)); 388 } 389 StereoToMono(low_pass_split_data(0), 390 low_pass_split_data(1), 391 mixed_low_pass_channels_->data(), 392 samples_per_split_channel_); 393 mixed_low_pass_valid_ = true; 394 } 395 return mixed_low_pass_channels_->data(); 396} 397 398const int16_t* AudioBuffer::low_pass_reference(int channel) const { 399 if (!reference_copied_) { 400 return NULL; 401 } 402 403 return low_pass_reference_channels_->channel(channel); 404} 405 406const float* AudioBuffer::keyboard_data() const { 407 return keyboard_data_; 408} 409 410SplitFilterStates* AudioBuffer::filter_states(int channel) { 411 assert(channel >= 0 && channel < num_proc_channels_); 412 return &filter_states_[channel]; 413} 414 415void AudioBuffer::set_activity(AudioFrame::VADActivity activity) { 416 activity_ = activity; 417} 418 419AudioFrame::VADActivity AudioBuffer::activity() const { 420 return activity_; 421} 422 423int AudioBuffer::num_channels() const { 424 return num_proc_channels_; 425} 426 427int AudioBuffer::samples_per_channel() const { 428 return proc_samples_per_channel_; 429} 430 431int AudioBuffer::samples_per_split_channel() const { 432 return samples_per_split_channel_; 433} 434 435int AudioBuffer::samples_per_keyboard_channel() const { 436 // We don't resample the keyboard channel. 437 return input_samples_per_channel_; 438} 439 440// TODO(andrew): Do deinterleaving and mixing in one step? 441void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { 442 assert(proc_samples_per_channel_ == input_samples_per_channel_); 443 assert(frame->num_channels_ == num_input_channels_); 444 assert(frame->samples_per_channel_ == proc_samples_per_channel_); 445 InitForNewData(); 446 activity_ = frame->vad_activity_; 447 448 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 449 // Downmix directly; no explicit deinterleaving needed. 450 int16_t* downmixed = channels_->ibuf()->channel(0); 451 for (int i = 0; i < input_samples_per_channel_; ++i) { 452 // HACK(ajm): The downmixing in the int16_t path is in practice never 453 // called from production code. We do this weird scaling to and from float 454 // to satisfy tests checking for bit-exactness with the float path. 455 float downmix_float = (S16ToFloat(frame->data_[i * 2]) + 456 S16ToFloat(frame->data_[i * 2 + 1])) / 2; 457 downmixed[i] = FloatToS16(downmix_float); 458 } 459 } else { 460 assert(num_proc_channels_ == num_input_channels_); 461 int16_t* interleaved = frame->data_; 462 for (int i = 0; i < num_proc_channels_; ++i) { 463 int16_t* deinterleaved = channels_->ibuf()->channel(i); 464 int interleaved_idx = i; 465 for (int j = 0; j < proc_samples_per_channel_; ++j) { 466 deinterleaved[j] = interleaved[interleaved_idx]; 467 interleaved_idx += num_proc_channels_; 468 } 469 } 470 } 471} 472 473void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const { 474 assert(proc_samples_per_channel_ == output_samples_per_channel_); 475 assert(num_proc_channels_ == num_input_channels_); 476 assert(frame->num_channels_ == num_proc_channels_); 477 assert(frame->samples_per_channel_ == proc_samples_per_channel_); 478 frame->vad_activity_ = activity_; 479 480 if (!data_changed) { 481 return; 482 } 483 484 int16_t* interleaved = frame->data_; 485 for (int i = 0; i < num_proc_channels_; i++) { 486 int16_t* deinterleaved = channels_->ibuf()->channel(i); 487 int interleaved_idx = i; 488 for (int j = 0; j < proc_samples_per_channel_; j++) { 489 interleaved[interleaved_idx] = deinterleaved[j]; 490 interleaved_idx += num_proc_channels_; 491 } 492 } 493} 494 495void AudioBuffer::CopyLowPassToReference() { 496 reference_copied_ = true; 497 if (!low_pass_reference_channels_.get()) { 498 low_pass_reference_channels_.reset( 499 new ChannelBuffer<int16_t>(samples_per_split_channel_, 500 num_proc_channels_)); 501 } 502 for (int i = 0; i < num_proc_channels_; i++) { 503 low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i); 504 } 505} 506 507} // namespace webrtc 508