1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "media/filters/audio_renderer_algorithm.h"
6
7#include <algorithm>
8#include <cmath>
9
10#include "base/logging.h"
11#include "base/memory/scoped_ptr.h"
12#include "media/base/audio_buffer.h"
13#include "media/base/audio_bus.h"
14#include "media/base/limits.h"
15#include "media/filters/wsola_internals.h"
16
17namespace media {
18
19
20// Waveform Similarity Overlap-and-add (WSOLA).
21//
22// One WSOLA iteration
23//
24// 1) Extract |target_block_| as input frames at indices
25//    [|target_block_index_|, |target_block_index_| + |ola_window_size_|).
26//    Note that |target_block_| is the "natural" continuation of the output.
27//
28// 2) Extract |search_block_| as input frames at indices
29//    [|search_block_index_|,
30//     |search_block_index_| + |num_candidate_blocks_| + |ola_window_size_|).
31//
32// 3) Find a block within the |search_block_| that is most similar
33//    to |target_block_|. Let |optimal_index| be the index of such block and
34//    write it to |optimal_block_|.
35//
36// 4) Update:
37//    |optimal_block_| = |transition_window_| * |target_block_| +
38//    (1 - |transition_window_|) * |optimal_block_|.
39//
40// 5) Overlap-and-add |optimal_block_| to the |wsola_output_|.
41//
42// 6) Update:
43//    |target_block_| = |optimal_index| + |ola_window_size_| / 2.
44//    |output_index_| = |output_index_| + |ola_window_size_| / 2,
45//    |search_block_center_offset_| = |output_index_| * |playback_rate|, and
46//    |search_block_index_| = |search_block_center_offset_| -
47//        |search_block_center_offset_|.
48
49// Max/min supported playback rates for fast/slow audio. Audio outside of these
50// ranges are muted.
51// Audio at these speeds would sound better under a frequency domain algorithm.
52static const float kMinPlaybackRate = 0.5f;
53static const float kMaxPlaybackRate = 4.0f;
54
55// Overlap-and-add window size in milliseconds.
56static const int kOlaWindowSizeMs = 20;
57
58// Size of search interval in milliseconds. The search interval is
59// [-delta delta] around |output_index_| * |playback_rate|. So the search
60// interval is 2 * delta.
61static const int kWsolaSearchIntervalMs = 30;
62
63// The maximum size in seconds for the |audio_buffer_|. Arbitrarily determined.
64static const int kMaxCapacityInSeconds = 3;
65
66// The starting size in frames for |audio_buffer_|. Previous usage maintained a
67// queue of 16 AudioBuffers, each of 512 frames. This worked well, so we
68// maintain this number of frames.
69static const int kStartingBufferSizeInFrames = 16 * 512;
70
71COMPILE_ASSERT(kStartingBufferSizeInFrames <
72               (kMaxCapacityInSeconds * limits::kMinSampleRate),
73               max_capacity_smaller_than_starting_buffer_size);
74
75AudioRendererAlgorithm::AudioRendererAlgorithm()
76    : channels_(0),
77      samples_per_second_(0),
78      muted_partial_frame_(0),
79      capacity_(kStartingBufferSizeInFrames),
80      output_time_(0.0),
81      search_block_center_offset_(0),
82      search_block_index_(0),
83      num_candidate_blocks_(0),
84      target_block_index_(0),
85      ola_window_size_(0),
86      ola_hop_size_(0),
87      num_complete_frames_(0) {
88}
89
90AudioRendererAlgorithm::~AudioRendererAlgorithm() {}
91
92void AudioRendererAlgorithm::Initialize(const AudioParameters& params) {
93  CHECK(params.IsValid());
94
95  channels_ = params.channels();
96  samples_per_second_ = params.sample_rate();
97  num_candidate_blocks_ = (kWsolaSearchIntervalMs * samples_per_second_) / 1000;
98  ola_window_size_ = kOlaWindowSizeMs * samples_per_second_ / 1000;
99
100  // Make sure window size in an even number.
101  ola_window_size_ += ola_window_size_ & 1;
102  ola_hop_size_ = ola_window_size_ / 2;
103
104  // |num_candidate_blocks_| / 2 is the offset of the center of the search
105  // block to the center of the first (left most) candidate block. The offset
106  // of the center of a candidate block to its left most point is
107  // |ola_window_size_| / 2 - 1. Note that |ola_window_size_| is even and in
108  // our convention the center belongs to the left half, so we need to subtract
109  // one frame to get the correct offset.
110  //
111  //                             Search Block
112  //              <------------------------------------------->
113  //
114  //   |ola_window_size_| / 2 - 1
115  //              <----
116  //
117  //             |num_candidate_blocks_| / 2
118  //                   <----------------
119  //                                 center
120  //              X----X----------------X---------------X-----X
121  //              <---------->                     <---------->
122  //                Candidate      ...               Candidate
123  //                   1,          ...         |num_candidate_blocks_|
124  search_block_center_offset_ = num_candidate_blocks_ / 2 +
125      (ola_window_size_ / 2 - 1);
126
127  ola_window_.reset(new float[ola_window_size_]);
128  internal::GetSymmetricHanningWindow(ola_window_size_, ola_window_.get());
129
130  transition_window_.reset(new float[ola_window_size_ * 2]);
131  internal::GetSymmetricHanningWindow(2 * ola_window_size_,
132                                      transition_window_.get());
133
134  wsola_output_ = AudioBus::Create(channels_, ola_window_size_ + ola_hop_size_);
135  wsola_output_->Zero();  // Initialize for overlap-and-add of the first block.
136
137  // Auxiliary containers.
138  optimal_block_ = AudioBus::Create(channels_, ola_window_size_);
139  search_block_ = AudioBus::Create(
140      channels_, num_candidate_blocks_ + (ola_window_size_ - 1));
141  target_block_ = AudioBus::Create(channels_, ola_window_size_);
142}
143
144int AudioRendererAlgorithm::FillBuffer(AudioBus* dest,
145                                       int requested_frames,
146                                       float playback_rate) {
147  if (playback_rate == 0)
148    return 0;
149
150  DCHECK_EQ(channels_, dest->channels());
151
152  // Optimize the muted case to issue a single clear instead of performing
153  // the full crossfade and clearing each crossfaded frame.
154  if (playback_rate < kMinPlaybackRate || playback_rate > kMaxPlaybackRate) {
155    int frames_to_render =
156        std::min(static_cast<int>(audio_buffer_.frames() / playback_rate),
157                 requested_frames);
158
159    // Compute accurate number of frames to actually skip in the source data.
160    // Includes the leftover partial frame from last request. However, we can
161    // only skip over complete frames, so a partial frame may remain for next
162    // time.
163    muted_partial_frame_ += frames_to_render * playback_rate;
164    int seek_frames = static_cast<int>(muted_partial_frame_);
165    dest->ZeroFrames(frames_to_render);
166    audio_buffer_.SeekFrames(seek_frames);
167
168    // Determine the partial frame that remains to be skipped for next call. If
169    // the user switches back to playing, it may be off time by this partial
170    // frame, which would be undetectable. If they subsequently switch to
171    // another playback rate that mutes, the code will attempt to line up the
172    // frames again.
173    muted_partial_frame_ -= seek_frames;
174    return frames_to_render;
175  }
176
177  int slower_step = ceil(ola_window_size_ * playback_rate);
178  int faster_step = ceil(ola_window_size_ / playback_rate);
179
180  // Optimize the most common |playback_rate| ~= 1 case to use a single copy
181  // instead of copying frame by frame.
182  if (ola_window_size_ <= faster_step && slower_step >= ola_window_size_) {
183    const int frames_to_copy =
184        std::min(audio_buffer_.frames(), requested_frames);
185    const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest);
186    DCHECK_EQ(frames_read, frames_to_copy);
187    return frames_read;
188  }
189
190  int rendered_frames = 0;
191  do {
192    rendered_frames += WriteCompletedFramesTo(
193        requested_frames - rendered_frames, rendered_frames, dest);
194  } while (rendered_frames < requested_frames &&
195           RunOneWsolaIteration(playback_rate));
196  return rendered_frames;
197}
198
199void AudioRendererAlgorithm::FlushBuffers() {
200  // Clear the queue of decoded packets (releasing the buffers).
201  audio_buffer_.Clear();
202  output_time_ = 0.0;
203  search_block_index_ = 0;
204  target_block_index_ = 0;
205  wsola_output_->Zero();
206  num_complete_frames_ = 0;
207
208  // Reset |capacity_| so growth triggered by underflows doesn't penalize
209  // seek time.
210  capacity_ = kStartingBufferSizeInFrames;
211}
212
213void AudioRendererAlgorithm::EnqueueBuffer(
214    const scoped_refptr<AudioBuffer>& buffer_in) {
215  DCHECK(!buffer_in->end_of_stream());
216  audio_buffer_.Append(buffer_in);
217}
218
219bool AudioRendererAlgorithm::IsQueueFull() {
220  return audio_buffer_.frames() >= capacity_;
221}
222
223void AudioRendererAlgorithm::IncreaseQueueCapacity() {
224  int max_capacity = kMaxCapacityInSeconds * samples_per_second_;
225  DCHECK_LE(capacity_, max_capacity);
226
227  capacity_ = std::min(2 * capacity_, max_capacity);
228}
229
230bool AudioRendererAlgorithm::CanPerformWsola() const {
231  const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1);
232  const int frames = audio_buffer_.frames();
233  return target_block_index_ + ola_window_size_ <= frames &&
234      search_block_index_ + search_block_size <= frames;
235}
236
237bool AudioRendererAlgorithm::RunOneWsolaIteration(float playback_rate) {
238  if (!CanPerformWsola())
239    return false;
240
241  GetOptimalBlock();
242
243  // Overlap-and-add.
244  for (int k = 0; k < channels_; ++k) {
245    const float* const ch_opt_frame = optimal_block_->channel(k);
246    float* ch_output = wsola_output_->channel(k) + num_complete_frames_;
247    for (int n = 0; n < ola_hop_size_; ++n) {
248      ch_output[n] = ch_output[n] * ola_window_[ola_hop_size_ + n] +
249          ch_opt_frame[n] * ola_window_[n];
250    }
251
252    // Copy the second half to the output.
253    memcpy(&ch_output[ola_hop_size_], &ch_opt_frame[ola_hop_size_],
254           sizeof(*ch_opt_frame) * ola_hop_size_);
255  }
256
257  num_complete_frames_ += ola_hop_size_;
258  UpdateOutputTime(playback_rate, ola_hop_size_);
259  RemoveOldInputFrames(playback_rate);
260  return true;
261}
262
263void AudioRendererAlgorithm::UpdateOutputTime(float playback_rate,
264                                              double time_change) {
265  output_time_ += time_change;
266  // Center of the search region, in frames.
267  const int search_block_center_index = static_cast<int>(
268      output_time_ * playback_rate + 0.5);
269  search_block_index_ = search_block_center_index - search_block_center_offset_;
270}
271
272void AudioRendererAlgorithm::RemoveOldInputFrames(float playback_rate) {
273  const int earliest_used_index = std::min(target_block_index_,
274                                           search_block_index_);
275  if (earliest_used_index <= 0)
276    return;  // Nothing to remove.
277
278  // Remove frames from input and adjust indices accordingly.
279  audio_buffer_.SeekFrames(earliest_used_index);
280  target_block_index_ -= earliest_used_index;
281
282  // Adjust output index.
283  double output_time_change = static_cast<double>(earliest_used_index) /
284      playback_rate;
285  CHECK_GE(output_time_, output_time_change);
286  UpdateOutputTime(playback_rate, -output_time_change);
287}
288
289int AudioRendererAlgorithm::WriteCompletedFramesTo(
290    int requested_frames, int dest_offset, AudioBus* dest) {
291  int rendered_frames = std::min(num_complete_frames_, requested_frames);
292
293  if (rendered_frames == 0)
294    return 0;  // There is nothing to read from |wsola_output_|, return.
295
296  wsola_output_->CopyPartialFramesTo(0, rendered_frames, dest_offset, dest);
297
298  // Remove the frames which are read.
299  int frames_to_move = wsola_output_->frames() - rendered_frames;
300  for (int k = 0; k < channels_; ++k) {
301    float* ch = wsola_output_->channel(k);
302    memmove(ch, &ch[rendered_frames], sizeof(*ch) * frames_to_move);
303  }
304  num_complete_frames_ -= rendered_frames;
305  return rendered_frames;
306}
307
308bool AudioRendererAlgorithm::TargetIsWithinSearchRegion() const {
309  const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1);
310
311  return target_block_index_ >= search_block_index_ &&
312      target_block_index_ + ola_window_size_ <=
313      search_block_index_ + search_block_size;
314}
315
316void AudioRendererAlgorithm::GetOptimalBlock() {
317  int optimal_index = 0;
318
319  // An interval around last optimal block which is excluded from the search.
320  // This is to reduce the buzzy sound. The number 160 is rather arbitrary and
321  // derived heuristically.
322  const int kExcludeIntervalLengthFrames = 160;
323  if (TargetIsWithinSearchRegion()) {
324    optimal_index = target_block_index_;
325    PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get());
326  } else {
327    PeekAudioWithZeroPrepend(target_block_index_, target_block_.get());
328    PeekAudioWithZeroPrepend(search_block_index_, search_block_.get());
329    int last_optimal = target_block_index_ - ola_hop_size_ -
330        search_block_index_;
331    internal::Interval exclude_iterval = std::make_pair(
332        last_optimal - kExcludeIntervalLengthFrames / 2,
333        last_optimal + kExcludeIntervalLengthFrames / 2);
334
335    // |optimal_index| is in frames and it is relative to the beginning of the
336    // |search_block_|.
337    optimal_index = internal::OptimalIndex(
338        search_block_.get(), target_block_.get(), exclude_iterval);
339
340    // Translate |index| w.r.t. the beginning of |audio_buffer_| and extract the
341    // optimal block.
342    optimal_index += search_block_index_;
343    PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get());
344
345    // Make a transition from target block to the optimal block if different.
346    // Target block has the best continuation to the current output.
347    // Optimal block is the most similar block to the target, however, it might
348    // introduce some discontinuity when over-lap-added. Therefore, we combine
349    // them for a smoother transition. The length of transition window is twice
350    // as that of the optimal-block which makes it like a weighting function
351    // where target-block has higher weight close to zero (weight of 1 at index
352    // 0) and lower weight close the end.
353    for (int k = 0; k < channels_; ++k) {
354      float* ch_opt = optimal_block_->channel(k);
355      const float* const ch_target = target_block_->channel(k);
356      for (int n = 0; n < ola_window_size_; ++n) {
357        ch_opt[n] = ch_opt[n] * transition_window_[n] + ch_target[n] *
358            transition_window_[ola_window_size_ + n];
359      }
360    }
361  }
362
363  // Next target is one hop ahead of the current optimal.
364  target_block_index_ = optimal_index + ola_hop_size_;
365}
366
367void AudioRendererAlgorithm::PeekAudioWithZeroPrepend(
368    int read_offset_frames, AudioBus* dest) {
369  CHECK_LE(read_offset_frames + dest->frames(), audio_buffer_.frames());
370
371  int write_offset = 0;
372  int num_frames_to_read = dest->frames();
373  if (read_offset_frames < 0) {
374    int num_zero_frames_appended = std::min(-read_offset_frames,
375                                            num_frames_to_read);
376    read_offset_frames = 0;
377    num_frames_to_read -= num_zero_frames_appended;
378    write_offset = num_zero_frames_appended;
379    dest->ZeroFrames(num_zero_frames_appended);
380  }
381  audio_buffer_.PeekFrames(num_frames_to_read, read_offset_frames,
382                           write_offset, dest);
383}
384
385}  // namespace media
386