1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "media/filters/audio_renderer_algorithm.h" 6 7#include <algorithm> 8#include <cmath> 9 10#include "base/logging.h" 11#include "base/memory/scoped_ptr.h" 12#include "media/base/audio_buffer.h" 13#include "media/base/audio_bus.h" 14#include "media/base/limits.h" 15#include "media/filters/wsola_internals.h" 16 17namespace media { 18 19 20// Waveform Similarity Overlap-and-add (WSOLA). 21// 22// One WSOLA iteration 23// 24// 1) Extract |target_block_| as input frames at indices 25// [|target_block_index_|, |target_block_index_| + |ola_window_size_|). 26// Note that |target_block_| is the "natural" continuation of the output. 27// 28// 2) Extract |search_block_| as input frames at indices 29// [|search_block_index_|, 30// |search_block_index_| + |num_candidate_blocks_| + |ola_window_size_|). 31// 32// 3) Find a block within the |search_block_| that is most similar 33// to |target_block_|. Let |optimal_index| be the index of such block and 34// write it to |optimal_block_|. 35// 36// 4) Update: 37// |optimal_block_| = |transition_window_| * |target_block_| + 38// (1 - |transition_window_|) * |optimal_block_|. 39// 40// 5) Overlap-and-add |optimal_block_| to the |wsola_output_|. 41// 42// 6) Update: 43// |target_block_| = |optimal_index| + |ola_window_size_| / 2. 44// |output_index_| = |output_index_| + |ola_window_size_| / 2, 45// |search_block_center_offset_| = |output_index_| * |playback_rate|, and 46// |search_block_index_| = |search_block_center_offset_| - 47// |search_block_center_offset_|. 48 49// Max/min supported playback rates for fast/slow audio. Audio outside of these 50// ranges are muted. 51// Audio at these speeds would sound better under a frequency domain algorithm. 52static const float kMinPlaybackRate = 0.5f; 53static const float kMaxPlaybackRate = 4.0f; 54 55// Overlap-and-add window size in milliseconds. 56static const int kOlaWindowSizeMs = 20; 57 58// Size of search interval in milliseconds. The search interval is 59// [-delta delta] around |output_index_| * |playback_rate|. So the search 60// interval is 2 * delta. 61static const int kWsolaSearchIntervalMs = 30; 62 63// The maximum size in seconds for the |audio_buffer_|. Arbitrarily determined. 64static const int kMaxCapacityInSeconds = 3; 65 66// The starting size in frames for |audio_buffer_|. Previous usage maintained a 67// queue of 16 AudioBuffers, each of 512 frames. This worked well, so we 68// maintain this number of frames. 69static const int kStartingBufferSizeInFrames = 16 * 512; 70 71COMPILE_ASSERT(kStartingBufferSizeInFrames < 72 (kMaxCapacityInSeconds * limits::kMinSampleRate), 73 max_capacity_smaller_than_starting_buffer_size); 74 75AudioRendererAlgorithm::AudioRendererAlgorithm() 76 : channels_(0), 77 samples_per_second_(0), 78 muted_partial_frame_(0), 79 capacity_(kStartingBufferSizeInFrames), 80 output_time_(0.0), 81 search_block_center_offset_(0), 82 search_block_index_(0), 83 num_candidate_blocks_(0), 84 target_block_index_(0), 85 ola_window_size_(0), 86 ola_hop_size_(0), 87 num_complete_frames_(0) { 88} 89 90AudioRendererAlgorithm::~AudioRendererAlgorithm() {} 91 92void AudioRendererAlgorithm::Initialize(const AudioParameters& params) { 93 CHECK(params.IsValid()); 94 95 channels_ = params.channels(); 96 samples_per_second_ = params.sample_rate(); 97 num_candidate_blocks_ = (kWsolaSearchIntervalMs * samples_per_second_) / 1000; 98 ola_window_size_ = kOlaWindowSizeMs * samples_per_second_ / 1000; 99 100 // Make sure window size in an even number. 101 ola_window_size_ += ola_window_size_ & 1; 102 ola_hop_size_ = ola_window_size_ / 2; 103 104 // |num_candidate_blocks_| / 2 is the offset of the center of the search 105 // block to the center of the first (left most) candidate block. The offset 106 // of the center of a candidate block to its left most point is 107 // |ola_window_size_| / 2 - 1. Note that |ola_window_size_| is even and in 108 // our convention the center belongs to the left half, so we need to subtract 109 // one frame to get the correct offset. 110 // 111 // Search Block 112 // <-------------------------------------------> 113 // 114 // |ola_window_size_| / 2 - 1 115 // <---- 116 // 117 // |num_candidate_blocks_| / 2 118 // <---------------- 119 // center 120 // X----X----------------X---------------X-----X 121 // <----------> <----------> 122 // Candidate ... Candidate 123 // 1, ... |num_candidate_blocks_| 124 search_block_center_offset_ = num_candidate_blocks_ / 2 + 125 (ola_window_size_ / 2 - 1); 126 127 ola_window_.reset(new float[ola_window_size_]); 128 internal::GetSymmetricHanningWindow(ola_window_size_, ola_window_.get()); 129 130 transition_window_.reset(new float[ola_window_size_ * 2]); 131 internal::GetSymmetricHanningWindow(2 * ola_window_size_, 132 transition_window_.get()); 133 134 wsola_output_ = AudioBus::Create(channels_, ola_window_size_ + ola_hop_size_); 135 wsola_output_->Zero(); // Initialize for overlap-and-add of the first block. 136 137 // Auxiliary containers. 138 optimal_block_ = AudioBus::Create(channels_, ola_window_size_); 139 search_block_ = AudioBus::Create( 140 channels_, num_candidate_blocks_ + (ola_window_size_ - 1)); 141 target_block_ = AudioBus::Create(channels_, ola_window_size_); 142} 143 144int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, 145 int requested_frames, 146 float playback_rate) { 147 if (playback_rate == 0) 148 return 0; 149 150 DCHECK_EQ(channels_, dest->channels()); 151 152 // Optimize the muted case to issue a single clear instead of performing 153 // the full crossfade and clearing each crossfaded frame. 154 if (playback_rate < kMinPlaybackRate || playback_rate > kMaxPlaybackRate) { 155 int frames_to_render = 156 std::min(static_cast<int>(audio_buffer_.frames() / playback_rate), 157 requested_frames); 158 159 // Compute accurate number of frames to actually skip in the source data. 160 // Includes the leftover partial frame from last request. However, we can 161 // only skip over complete frames, so a partial frame may remain for next 162 // time. 163 muted_partial_frame_ += frames_to_render * playback_rate; 164 int seek_frames = static_cast<int>(muted_partial_frame_); 165 dest->ZeroFrames(frames_to_render); 166 audio_buffer_.SeekFrames(seek_frames); 167 168 // Determine the partial frame that remains to be skipped for next call. If 169 // the user switches back to playing, it may be off time by this partial 170 // frame, which would be undetectable. If they subsequently switch to 171 // another playback rate that mutes, the code will attempt to line up the 172 // frames again. 173 muted_partial_frame_ -= seek_frames; 174 return frames_to_render; 175 } 176 177 int slower_step = ceil(ola_window_size_ * playback_rate); 178 int faster_step = ceil(ola_window_size_ / playback_rate); 179 180 // Optimize the most common |playback_rate| ~= 1 case to use a single copy 181 // instead of copying frame by frame. 182 if (ola_window_size_ <= faster_step && slower_step >= ola_window_size_) { 183 const int frames_to_copy = 184 std::min(audio_buffer_.frames(), requested_frames); 185 const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest); 186 DCHECK_EQ(frames_read, frames_to_copy); 187 return frames_read; 188 } 189 190 int rendered_frames = 0; 191 do { 192 rendered_frames += WriteCompletedFramesTo( 193 requested_frames - rendered_frames, rendered_frames, dest); 194 } while (rendered_frames < requested_frames && 195 RunOneWsolaIteration(playback_rate)); 196 return rendered_frames; 197} 198 199void AudioRendererAlgorithm::FlushBuffers() { 200 // Clear the queue of decoded packets (releasing the buffers). 201 audio_buffer_.Clear(); 202 output_time_ = 0.0; 203 search_block_index_ = 0; 204 target_block_index_ = 0; 205 wsola_output_->Zero(); 206 num_complete_frames_ = 0; 207 208 // Reset |capacity_| so growth triggered by underflows doesn't penalize 209 // seek time. 210 capacity_ = kStartingBufferSizeInFrames; 211} 212 213void AudioRendererAlgorithm::EnqueueBuffer( 214 const scoped_refptr<AudioBuffer>& buffer_in) { 215 DCHECK(!buffer_in->end_of_stream()); 216 audio_buffer_.Append(buffer_in); 217} 218 219bool AudioRendererAlgorithm::IsQueueFull() { 220 return audio_buffer_.frames() >= capacity_; 221} 222 223void AudioRendererAlgorithm::IncreaseQueueCapacity() { 224 int max_capacity = kMaxCapacityInSeconds * samples_per_second_; 225 DCHECK_LE(capacity_, max_capacity); 226 227 capacity_ = std::min(2 * capacity_, max_capacity); 228} 229 230bool AudioRendererAlgorithm::CanPerformWsola() const { 231 const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1); 232 const int frames = audio_buffer_.frames(); 233 return target_block_index_ + ola_window_size_ <= frames && 234 search_block_index_ + search_block_size <= frames; 235} 236 237bool AudioRendererAlgorithm::RunOneWsolaIteration(float playback_rate) { 238 if (!CanPerformWsola()) 239 return false; 240 241 GetOptimalBlock(); 242 243 // Overlap-and-add. 244 for (int k = 0; k < channels_; ++k) { 245 const float* const ch_opt_frame = optimal_block_->channel(k); 246 float* ch_output = wsola_output_->channel(k) + num_complete_frames_; 247 for (int n = 0; n < ola_hop_size_; ++n) { 248 ch_output[n] = ch_output[n] * ola_window_[ola_hop_size_ + n] + 249 ch_opt_frame[n] * ola_window_[n]; 250 } 251 252 // Copy the second half to the output. 253 memcpy(&ch_output[ola_hop_size_], &ch_opt_frame[ola_hop_size_], 254 sizeof(*ch_opt_frame) * ola_hop_size_); 255 } 256 257 num_complete_frames_ += ola_hop_size_; 258 UpdateOutputTime(playback_rate, ola_hop_size_); 259 RemoveOldInputFrames(playback_rate); 260 return true; 261} 262 263void AudioRendererAlgorithm::UpdateOutputTime(float playback_rate, 264 double time_change) { 265 output_time_ += time_change; 266 // Center of the search region, in frames. 267 const int search_block_center_index = static_cast<int>( 268 output_time_ * playback_rate + 0.5); 269 search_block_index_ = search_block_center_index - search_block_center_offset_; 270} 271 272void AudioRendererAlgorithm::RemoveOldInputFrames(float playback_rate) { 273 const int earliest_used_index = std::min(target_block_index_, 274 search_block_index_); 275 if (earliest_used_index <= 0) 276 return; // Nothing to remove. 277 278 // Remove frames from input and adjust indices accordingly. 279 audio_buffer_.SeekFrames(earliest_used_index); 280 target_block_index_ -= earliest_used_index; 281 282 // Adjust output index. 283 double output_time_change = static_cast<double>(earliest_used_index) / 284 playback_rate; 285 CHECK_GE(output_time_, output_time_change); 286 UpdateOutputTime(playback_rate, -output_time_change); 287} 288 289int AudioRendererAlgorithm::WriteCompletedFramesTo( 290 int requested_frames, int dest_offset, AudioBus* dest) { 291 int rendered_frames = std::min(num_complete_frames_, requested_frames); 292 293 if (rendered_frames == 0) 294 return 0; // There is nothing to read from |wsola_output_|, return. 295 296 wsola_output_->CopyPartialFramesTo(0, rendered_frames, dest_offset, dest); 297 298 // Remove the frames which are read. 299 int frames_to_move = wsola_output_->frames() - rendered_frames; 300 for (int k = 0; k < channels_; ++k) { 301 float* ch = wsola_output_->channel(k); 302 memmove(ch, &ch[rendered_frames], sizeof(*ch) * frames_to_move); 303 } 304 num_complete_frames_ -= rendered_frames; 305 return rendered_frames; 306} 307 308bool AudioRendererAlgorithm::TargetIsWithinSearchRegion() const { 309 const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1); 310 311 return target_block_index_ >= search_block_index_ && 312 target_block_index_ + ola_window_size_ <= 313 search_block_index_ + search_block_size; 314} 315 316void AudioRendererAlgorithm::GetOptimalBlock() { 317 int optimal_index = 0; 318 319 // An interval around last optimal block which is excluded from the search. 320 // This is to reduce the buzzy sound. The number 160 is rather arbitrary and 321 // derived heuristically. 322 const int kExcludeIntervalLengthFrames = 160; 323 if (TargetIsWithinSearchRegion()) { 324 optimal_index = target_block_index_; 325 PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get()); 326 } else { 327 PeekAudioWithZeroPrepend(target_block_index_, target_block_.get()); 328 PeekAudioWithZeroPrepend(search_block_index_, search_block_.get()); 329 int last_optimal = target_block_index_ - ola_hop_size_ - 330 search_block_index_; 331 internal::Interval exclude_iterval = std::make_pair( 332 last_optimal - kExcludeIntervalLengthFrames / 2, 333 last_optimal + kExcludeIntervalLengthFrames / 2); 334 335 // |optimal_index| is in frames and it is relative to the beginning of the 336 // |search_block_|. 337 optimal_index = internal::OptimalIndex( 338 search_block_.get(), target_block_.get(), exclude_iterval); 339 340 // Translate |index| w.r.t. the beginning of |audio_buffer_| and extract the 341 // optimal block. 342 optimal_index += search_block_index_; 343 PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get()); 344 345 // Make a transition from target block to the optimal block if different. 346 // Target block has the best continuation to the current output. 347 // Optimal block is the most similar block to the target, however, it might 348 // introduce some discontinuity when over-lap-added. Therefore, we combine 349 // them for a smoother transition. The length of transition window is twice 350 // as that of the optimal-block which makes it like a weighting function 351 // where target-block has higher weight close to zero (weight of 1 at index 352 // 0) and lower weight close the end. 353 for (int k = 0; k < channels_; ++k) { 354 float* ch_opt = optimal_block_->channel(k); 355 const float* const ch_target = target_block_->channel(k); 356 for (int n = 0; n < ola_window_size_; ++n) { 357 ch_opt[n] = ch_opt[n] * transition_window_[n] + ch_target[n] * 358 transition_window_[ola_window_size_ + n]; 359 } 360 } 361 } 362 363 // Next target is one hop ahead of the current optimal. 364 target_block_index_ = optimal_index + ola_hop_size_; 365} 366 367void AudioRendererAlgorithm::PeekAudioWithZeroPrepend( 368 int read_offset_frames, AudioBus* dest) { 369 CHECK_LE(read_offset_frames + dest->frames(), audio_buffer_.frames()); 370 371 int write_offset = 0; 372 int num_frames_to_read = dest->frames(); 373 if (read_offset_frames < 0) { 374 int num_zero_frames_appended = std::min(-read_offset_frames, 375 num_frames_to_read); 376 read_offset_frames = 0; 377 num_frames_to_read -= num_zero_frames_appended; 378 write_offset = num_zero_frames_appended; 379 dest->ZeroFrames(num_zero_frames_appended); 380 } 381 audio_buffer_.PeekFrames(num_frames_to_read, read_offset_frames, 382 write_offset, dest); 383} 384 385} // namespace media 386