audio_buffer.cc revision 4cc763621eeeb29d0bf1d16d69b2f96d711ead2b
1/* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "webrtc/modules/audio_processing/audio_buffer.h" 12 13#include "webrtc/common_audio/include/audio_util.h" 14#include "webrtc/common_audio/resampler/push_sinc_resampler.h" 15#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 16 17namespace webrtc { 18namespace { 19 20enum { 21 kSamplesPer8kHzChannel = 80, 22 kSamplesPer16kHzChannel = 160, 23 kSamplesPer32kHzChannel = 320 24}; 25 26bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) { 27 switch (layout) { 28 case AudioProcessing::kMono: 29 case AudioProcessing::kStereo: 30 return false; 31 case AudioProcessing::kMonoAndKeyboard: 32 case AudioProcessing::kStereoAndKeyboard: 33 return true; 34 } 35 assert(false); 36 return false; 37} 38 39int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { 40 switch (layout) { 41 case AudioProcessing::kMono: 42 case AudioProcessing::kStereo: 43 assert(false); 44 return -1; 45 case AudioProcessing::kMonoAndKeyboard: 46 return 1; 47 case AudioProcessing::kStereoAndKeyboard: 48 return 2; 49 } 50 assert(false); 51 return -1; 52} 53 54 55void StereoToMono(const float* left, const float* right, float* out, 56 int samples_per_channel) { 57 for (int i = 0; i < samples_per_channel; ++i) { 58 out[i] = (left[i] + right[i]) / 2; 59 } 60} 61 62void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, 63 int samples_per_channel) { 64 for (int i = 0; i < samples_per_channel; ++i) { 65 out[i] = (left[i] + right[i]) >> 1; 66 } 67} 68 69} // namespace 70 71class SplitChannelBuffer { 72 public: 73 SplitChannelBuffer(int samples_per_split_channel, int num_channels) 74 : low_(samples_per_split_channel, num_channels), 75 high_(samples_per_split_channel, num_channels) { 76 } 77 ~SplitChannelBuffer() {} 78 79 int16_t* low_channel(int i) { return low_.channel(i); } 80 int16_t* high_channel(int i) { return high_.channel(i); } 81 82 private: 83 ChannelBuffer<int16_t> low_; 84 ChannelBuffer<int16_t> high_; 85}; 86 87AudioBuffer::AudioBuffer(int input_samples_per_channel, 88 int num_input_channels, 89 int process_samples_per_channel, 90 int num_process_channels, 91 int output_samples_per_channel) 92 : input_samples_per_channel_(input_samples_per_channel), 93 num_input_channels_(num_input_channels), 94 proc_samples_per_channel_(process_samples_per_channel), 95 num_proc_channels_(num_process_channels), 96 output_samples_per_channel_(output_samples_per_channel), 97 samples_per_split_channel_(proc_samples_per_channel_), 98 num_mixed_channels_(0), 99 num_mixed_low_pass_channels_(0), 100 reference_copied_(false), 101 activity_(AudioFrame::kVadUnknown), 102 is_muted_(false), 103 data_(NULL), 104 keyboard_data_(NULL), 105 channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_, 106 num_proc_channels_)) { 107 assert(input_samples_per_channel_ > 0); 108 assert(proc_samples_per_channel_ > 0); 109 assert(output_samples_per_channel_ > 0); 110 assert(num_input_channels_ > 0 && num_input_channels_ <= 2); 111 assert(num_proc_channels_ <= num_input_channels); 112 113 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 114 input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_, 115 num_proc_channels_)); 116 } 117 118 if (input_samples_per_channel_ != proc_samples_per_channel_ || 119 output_samples_per_channel_ != proc_samples_per_channel_) { 120 // Create an intermediate buffer for resampling. 121 process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_, 122 num_proc_channels_)); 123 } 124 125 if (input_samples_per_channel_ != proc_samples_per_channel_) { 126 input_resamplers_.reserve(num_proc_channels_); 127 for (int i = 0; i < num_proc_channels_; ++i) { 128 input_resamplers_.push_back( 129 new PushSincResampler(input_samples_per_channel_, 130 proc_samples_per_channel_)); 131 } 132 } 133 134 if (output_samples_per_channel_ != proc_samples_per_channel_) { 135 output_resamplers_.reserve(num_proc_channels_); 136 for (int i = 0; i < num_proc_channels_; ++i) { 137 output_resamplers_.push_back( 138 new PushSincResampler(proc_samples_per_channel_, 139 output_samples_per_channel_)); 140 } 141 } 142 143 if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) { 144 samples_per_split_channel_ = kSamplesPer16kHzChannel; 145 split_channels_.reset(new SplitChannelBuffer(samples_per_split_channel_, 146 num_proc_channels_)); 147 filter_states_.reset(new SplitFilterStates[num_proc_channels_]); 148 } 149} 150 151AudioBuffer::~AudioBuffer() {} 152 153void AudioBuffer::CopyFrom(const float* const* data, 154 int samples_per_channel, 155 AudioProcessing::ChannelLayout layout) { 156 assert(samples_per_channel == input_samples_per_channel_); 157 assert(ChannelsFromLayout(layout) == num_input_channels_); 158 InitForNewData(); 159 160 if (HasKeyboardChannel(layout)) { 161 keyboard_data_ = data[KeyboardChannelIndex(layout)]; 162 } 163 164 // Downmix. 165 const float* const* data_ptr = data; 166 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { 167 StereoToMono(data[0], 168 data[1], 169 input_buffer_->channel(0), 170 input_samples_per_channel_); 171 data_ptr = input_buffer_->channels(); 172 } 173 174 // Resample. 175 if (input_samples_per_channel_ != proc_samples_per_channel_) { 176 for (int i = 0; i < num_proc_channels_; ++i) { 177 input_resamplers_[i]->Resample(data_ptr[i], 178 input_samples_per_channel_, 179 process_buffer_->channel(i), 180 proc_samples_per_channel_); 181 } 182 data_ptr = process_buffer_->channels(); 183 } 184 185 // Convert to int16. 186 for (int i = 0; i < num_proc_channels_; ++i) { 187 ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_, 188 channels_->channel(i)); 189 } 190} 191 192void AudioBuffer::CopyTo(int samples_per_channel, 193 AudioProcessing::ChannelLayout layout, 194 float* const* data) { 195 assert(samples_per_channel == output_samples_per_channel_); 196 assert(ChannelsFromLayout(layout) == num_proc_channels_); 197 198 // Convert to float. 199 float* const* data_ptr = data; 200 if (output_samples_per_channel_ != proc_samples_per_channel_) { 201 // Convert to an intermediate buffer for subsequent resampling. 202 data_ptr = process_buffer_->channels(); 203 } 204 for (int i = 0; i < num_proc_channels_; ++i) { 205 ScaleToFloat(channels_->channel(i), proc_samples_per_channel_, data_ptr[i]); 206 } 207 208 // Resample. 209 if (output_samples_per_channel_ != proc_samples_per_channel_) { 210 for (int i = 0; i < num_proc_channels_; ++i) { 211 output_resamplers_[i]->Resample(data_ptr[i], 212 proc_samples_per_channel_, 213 data[i], 214 output_samples_per_channel_); 215 } 216 } 217} 218 219void AudioBuffer::InitForNewData() { 220 data_ = NULL; 221 keyboard_data_ = NULL; 222 num_mixed_channels_ = 0; 223 num_mixed_low_pass_channels_ = 0; 224 reference_copied_ = false; 225 activity_ = AudioFrame::kVadUnknown; 226 is_muted_ = false; 227} 228 229const int16_t* AudioBuffer::data(int channel) const { 230 assert(channel >= 0 && channel < num_proc_channels_); 231 if (data_ != NULL) { 232 assert(channel == 0 && num_proc_channels_ == 1); 233 return data_; 234 } 235 236 return channels_->channel(channel); 237} 238 239int16_t* AudioBuffer::data(int channel) { 240 const AudioBuffer* t = this; 241 return const_cast<int16_t*>(t->data(channel)); 242} 243 244const int16_t* AudioBuffer::low_pass_split_data(int channel) const { 245 assert(channel >= 0 && channel < num_proc_channels_); 246 if (split_channels_.get() == NULL) { 247 return data(channel); 248 } 249 250 return split_channels_->low_channel(channel); 251} 252 253int16_t* AudioBuffer::low_pass_split_data(int channel) { 254 const AudioBuffer* t = this; 255 return const_cast<int16_t*>(t->low_pass_split_data(channel)); 256} 257 258const int16_t* AudioBuffer::high_pass_split_data(int channel) const { 259 assert(channel >= 0 && channel < num_proc_channels_); 260 if (split_channels_.get() == NULL) { 261 return NULL; 262 } 263 264 return split_channels_->high_channel(channel); 265} 266 267int16_t* AudioBuffer::high_pass_split_data(int channel) { 268 const AudioBuffer* t = this; 269 return const_cast<int16_t*>(t->high_pass_split_data(channel)); 270} 271 272const int16_t* AudioBuffer::mixed_data(int channel) const { 273 assert(channel >= 0 && channel < num_mixed_channels_); 274 275 return mixed_channels_->channel(channel); 276} 277 278const int16_t* AudioBuffer::mixed_low_pass_data(int channel) const { 279 assert(channel >= 0 && channel < num_mixed_low_pass_channels_); 280 281 return mixed_low_pass_channels_->channel(channel); 282} 283 284const int16_t* AudioBuffer::low_pass_reference(int channel) const { 285 assert(channel >= 0 && channel < num_proc_channels_); 286 if (!reference_copied_) { 287 return NULL; 288 } 289 290 return low_pass_reference_channels_->channel(channel); 291} 292 293const float* AudioBuffer::keyboard_data() const { 294 return keyboard_data_; 295} 296 297SplitFilterStates* AudioBuffer::filter_states(int channel) { 298 assert(channel >= 0 && channel < num_proc_channels_); 299 return &filter_states_[channel]; 300} 301 302void AudioBuffer::set_activity(AudioFrame::VADActivity activity) { 303 activity_ = activity; 304} 305 306AudioFrame::VADActivity AudioBuffer::activity() const { 307 return activity_; 308} 309 310bool AudioBuffer::is_muted() const { 311 return is_muted_; 312} 313 314int AudioBuffer::num_channels() const { 315 return num_proc_channels_; 316} 317 318int AudioBuffer::samples_per_channel() const { 319 return proc_samples_per_channel_; 320} 321 322int AudioBuffer::samples_per_split_channel() const { 323 return samples_per_split_channel_; 324} 325 326int AudioBuffer::samples_per_keyboard_channel() const { 327 // We don't resample the keyboard channel. 328 return input_samples_per_channel_; 329} 330 331// TODO(andrew): Do deinterleaving and mixing in one step? 332void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { 333 assert(proc_samples_per_channel_ == input_samples_per_channel_); 334 assert(num_proc_channels_ == num_input_channels_); 335 assert(frame->num_channels_ == num_proc_channels_); 336 assert(frame->samples_per_channel_ == proc_samples_per_channel_); 337 InitForNewData(); 338 activity_ = frame->vad_activity_; 339 if (frame->energy_ == 0) { 340 is_muted_ = true; 341 } 342 343 if (num_proc_channels_ == 1) { 344 // We can get away with a pointer assignment in this case. 345 data_ = frame->data_; 346 return; 347 } 348 349 int16_t* interleaved = frame->data_; 350 for (int i = 0; i < num_proc_channels_; i++) { 351 int16_t* deinterleaved = channels_->channel(i); 352 int interleaved_idx = i; 353 for (int j = 0; j < proc_samples_per_channel_; j++) { 354 deinterleaved[j] = interleaved[interleaved_idx]; 355 interleaved_idx += num_proc_channels_; 356 } 357 } 358} 359 360void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const { 361 assert(proc_samples_per_channel_ == output_samples_per_channel_); 362 assert(num_proc_channels_ == num_input_channels_); 363 assert(frame->num_channels_ == num_proc_channels_); 364 assert(frame->samples_per_channel_ == proc_samples_per_channel_); 365 frame->vad_activity_ = activity_; 366 367 if (!data_changed) { 368 return; 369 } 370 371 if (num_proc_channels_ == 1) { 372 assert(data_ == frame->data_); 373 return; 374 } 375 376 int16_t* interleaved = frame->data_; 377 for (int i = 0; i < num_proc_channels_; i++) { 378 int16_t* deinterleaved = channels_->channel(i); 379 int interleaved_idx = i; 380 for (int j = 0; j < proc_samples_per_channel_; j++) { 381 interleaved[interleaved_idx] = deinterleaved[j]; 382 interleaved_idx += num_proc_channels_; 383 } 384 } 385} 386 387void AudioBuffer::CopyAndMix(int num_mixed_channels) { 388 // We currently only support the stereo to mono case. 389 assert(num_proc_channels_ == 2); 390 assert(num_mixed_channels == 1); 391 if (!mixed_channels_.get()) { 392 mixed_channels_.reset( 393 new ChannelBuffer<int16_t>(proc_samples_per_channel_, 394 num_mixed_channels)); 395 } 396 397 StereoToMono(channels_->channel(0), 398 channels_->channel(1), 399 mixed_channels_->channel(0), 400 proc_samples_per_channel_); 401 402 num_mixed_channels_ = num_mixed_channels; 403} 404 405void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) { 406 // We currently only support the stereo to mono case. 407 assert(num_proc_channels_ == 2); 408 assert(num_mixed_channels == 1); 409 if (!mixed_low_pass_channels_.get()) { 410 mixed_low_pass_channels_.reset( 411 new ChannelBuffer<int16_t>(samples_per_split_channel_, 412 num_mixed_channels)); 413 } 414 415 StereoToMono(low_pass_split_data(0), 416 low_pass_split_data(1), 417 mixed_low_pass_channels_->channel(0), 418 samples_per_split_channel_); 419 420 num_mixed_low_pass_channels_ = num_mixed_channels; 421} 422 423void AudioBuffer::CopyLowPassToReference() { 424 reference_copied_ = true; 425 if (!low_pass_reference_channels_.get()) { 426 low_pass_reference_channels_.reset( 427 new ChannelBuffer<int16_t>(samples_per_split_channel_, 428 num_proc_channels_)); 429 } 430 for (int i = 0; i < num_proc_channels_; i++) { 431 low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i); 432 } 433} 434 435} // namespace webrtc 436