1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "sola_time_scaler.h"
18
19#include <math.h>
20#include <hlogging.h>
21#include <algorithm>
22
23#include "ring_buffer.h"
24
25#define FLAGS_sola_ring_buffer 2.0
26#define FLAGS_sola_enable_correlation true
27
28
29namespace video_editing {
30
31// Returns a cross-correlation score for the specified buffers.
32int SolaAnalyzer::Correlate(const float* buffer1, const float* buffer2,
33                            int num_frames) {
34  CHECK(initialized_);
35
36  int score = 0;
37  num_frames *= num_channels_;
38  while (num_frames-- > 0) {
39    // Increment the score if the sign bits match.
40    score += ((bit_cast<int32>(*buffer1++) ^ bit_cast<int32>(*buffer2++)) >= 0)
41              ? 1 : 0;
42  }
43  return score;
44}
45
46// Trivial SolaAnalyzer class to bypass correlation.
47class SolaBypassAnalyzer : public SolaAnalyzer {
48 public:
49  SolaBypassAnalyzer() { }
50  virtual int Correlate(const float*, const float*, int num_frames) {
51    return num_frames * num_channels_;
52  }
53};
54
55
56// Default constructor.
57SolaTimeScaler::SolaTimeScaler()
58    : input_buffer_(NULL), output_buffer_(NULL), analyzer_(NULL) {
59  sample_rate_ = 0;
60  num_channels_ = 0;
61
62  draining_ = false;
63  initialized_ = false;
64}
65
66SolaTimeScaler::~SolaTimeScaler() {
67  delete input_buffer_;
68  delete output_buffer_;
69  delete analyzer_;
70}
71
72// Injects a SolaAnalyzer instance for analyzing signal frames.
73void SolaTimeScaler::set_analyzer(SolaAnalyzer* analyzer) {
74  MutexLock lock(&mutex_);  // lock out processing while updating
75  delete analyzer_;
76  analyzer_ = analyzer;
77}
78
79// Initializes a SOLA timescaler.
80void SolaTimeScaler::Init(double sample_rate,
81                          int num_channels,
82                          double initial_speed,
83                          double window_duration,
84                          double overlap_duration) {
85  MutexLock lock(&mutex_);  // lock out processing while updating
86
87  sample_rate_ = sample_rate;
88  num_channels_ = num_channels;
89  speed_ = initial_speed;
90  window_duration_ = window_duration;
91  overlap_duration_ = overlap_duration;
92
93  initialized_ = true;
94  GenerateParameters();
95  Reset();
96}
97
98// Adjusts the rate scaling factor.
99void SolaTimeScaler::set_speed(double speed) {
100  MutexLock lock(&mutex_);  // lock out processing while updating
101
102  speed_ = speed;
103  GenerateParameters();
104}
105
106// Generates processing parameters from the current settings.
107void SolaTimeScaler::GenerateParameters() {
108  if (speed_ < 0.1) {
109    LOGE("Requested speed %fx limited to 0.1x", speed_);
110    speed_ = 0.1;
111  } else if (speed_ > 8.0) {
112    LOGE("Requested speed %fx limited to 8.0x", speed_);
113    speed_ = 8.0;
114  }
115
116  ratio_ = 1.0 / speed_;
117
118  num_window_frames_ = nearbyint(sample_rate_ * window_duration_);
119
120  // Limit the overlap to half the window size, and round up to an odd number.
121  // Half of overlap window (rounded down) is also a useful number.
122  overlap_duration_ = min(overlap_duration_, window_duration_ / 2.0);
123  num_overlap_frames_ = nearbyint(sample_rate_ * overlap_duration_);
124  num_overlap_frames_ |= 1;
125  half_overlap_frames_ = num_overlap_frames_ >> 1;
126
127  if (speed_ >= 1.) {
128    // For compression (speed up), adjacent input windows overlap in the output.
129    input_window_offset_ = num_window_frames_;
130    target_merge_offset_ = nearbyint(num_window_frames_ * ratio_);
131  } else {
132    // For expansion (slow down), each input window start point overlaps the
133    // previous, and they are placed adjacently in the output
134    // (+/- half the overlap size).
135    input_window_offset_ = nearbyint(num_window_frames_ * speed_);
136    target_merge_offset_ = num_window_frames_;
137  }
138
139  // Make sure we copy enough extra data to be able to perform a
140  // frame correlation over the range of target merge point +/- half overlap,
141  // even when the previous merge point was adjusted backwards a half overlap.
142  max_frames_to_merge_ = max(num_window_frames_,
143      target_merge_offset_ + (2 * num_overlap_frames_));
144  min_output_to_hold_=
145      max_frames_to_merge_ + num_overlap_frames_ - target_merge_offset_;
146}
147
148// The input buffer has one writer and reader.
149// The output buffer has one reader/updater, and one reader/consumer.
150static const int kInputReader = 0;
151static const int kOutputAnalysis = 0;
152static const int kOutputConsumer = 1;
153
154void SolaTimeScaler::Reset() {
155  CHECK(initialized_);
156  double duration = max(FLAGS_sola_ring_buffer, 20. * window_duration_);
157  draining_ = false;
158
159  delete input_buffer_;
160  input_buffer_ = new RingBuffer();
161  input_buffer_->Init(static_cast<int>
162      (sample_rate_ * duration), num_channels_, 1);
163
164  delete output_buffer_;
165  output_buffer_ = new RingBuffer();
166  output_buffer_->Init(static_cast<int>
167      (sample_rate_ * ratio_ * duration), num_channels_, 2);
168
169  if (analyzer_ == NULL) {
170    if (FLAGS_sola_enable_correlation) {
171      analyzer_ = new SolaAnalyzer();
172    } else {
173      analyzer_ = new SolaBypassAnalyzer();
174    }
175  }
176  analyzer_->Init(sample_rate_, num_channels_);
177}
178
179// Returns the number of frames that the input buffer can accept.
180int SolaTimeScaler::input_limit() const {
181  CHECK(initialized_);
182  return input_buffer_->overhead();
183}
184
185// Returns the number of available output frames.
186int SolaTimeScaler::available() {
187  CHECK(initialized_);
188
189  int available = output_buffer_->available(kOutputConsumer);
190  if (available > min_output_to_hold_) {
191    available -= min_output_to_hold_;
192  } else if (draining_) {
193    Process();
194    available = output_buffer_->available(kOutputConsumer);
195    if (available > min_output_to_hold_) {
196      available -= min_output_to_hold_;
197    }
198  } else {
199    available = 0;
200  }
201  return available;
202}
203
204void SolaTimeScaler::Drain() {
205  CHECK(initialized_);
206
207  draining_ = true;
208}
209
210
211// Feeds audio to the timescaler, and processes as much data as possible.
212int SolaTimeScaler::InjectSamples(float* buffer, int num_frames) {
213  CHECK(initialized_);
214
215  // Do not write more frames than the buffer can accept.
216  num_frames = min(input_limit(), num_frames);
217  if (!num_frames) {
218    return 0;
219  }
220
221  // Copy samples to the input buffer and then process whatever can be consumed.
222  input_buffer_->Write(buffer, num_frames);
223  Process();
224  return num_frames;
225}
226
227// Retrieves audio data from the timescaler.
228int SolaTimeScaler::RetrieveSamples(float* buffer, int num_frames) {
229  CHECK(initialized_);
230
231  // Do not read more frames than available.
232  num_frames = min(available(), num_frames);
233  if (!num_frames) {
234    return 0;
235  }
236
237  output_buffer_->Copy(kOutputConsumer, buffer, num_frames);
238  output_buffer_->Seek(kOutputConsumer,
239                       output_buffer_->Tell(kOutputConsumer) + num_frames);
240
241  return num_frames;
242}
243
244// Munges input samples to produce output.
245bool SolaTimeScaler::Process() {
246  CHECK(initialized_);
247  bool generated_data = false;
248
249  // We can only process data if there is sufficient input available
250  // (or we are draining the latency), and there is sufficient room
251  // for output to be merged.
252  while (((input_buffer_->available(kInputReader) > max_frames_to_merge_) ||
253         draining_) && (output_buffer_->overhead() >= max_frames_to_merge_)) {
254    MutexLock lock(&mutex_);  // lock out updates while processing each window
255
256    // Determine the number of samples to merge into the output.
257    int input_count =
258        min(input_buffer_->available(kInputReader), max_frames_to_merge_);
259    if (input_count == 0) {
260      break;
261    }
262    // The input reader always points to the next window to process.
263    float* input_pointer = input_buffer_->GetPointer(kInputReader, input_count);
264
265    // The analysis reader always points to the ideal target merge point,
266    // minus half an overlap window (ie, the starting point for correlation).
267    // That means the available data from that point equals the number
268    // of samples that must be cross-faded.
269    int output_merge_cnt = output_buffer_->available(kOutputAnalysis);
270    float* output_pointer =
271        output_buffer_->GetPointer(kOutputAnalysis, output_merge_cnt);
272
273    // If there is not enough data to do a proper correlation,
274    // just merge at the ideal target point. Otherwise,
275    // find the best correlation score, working from the center out.
276    int merge_offset = min(output_merge_cnt, half_overlap_frames_);
277
278    if ((output_merge_cnt >= (2 * num_overlap_frames_)) &&
279        (input_count >= num_overlap_frames_)) {
280      int best_offset = merge_offset;
281      int best_score = 0;
282      int score;
283      for (int i = 0; i <= half_overlap_frames_; ++i) {
284        score = analyzer_->Correlate(input_pointer,
285            output_pointer + ((merge_offset + i) * num_channels_),
286            num_overlap_frames_);
287        if (score > best_score) {
288          best_score = score;
289          best_offset = merge_offset + i;
290          if (score == (num_overlap_frames_ * num_channels_)) {
291            break;  // It doesn't get better than perfect.
292          }
293        }
294        if (i > 0) {
295          score = analyzer_->Correlate(input_pointer,
296              output_pointer + ((merge_offset - i) * num_channels_),
297              num_overlap_frames_);
298          if (score > best_score) {
299            best_score = score;
300            best_offset = merge_offset - i;
301            if (score == (num_overlap_frames_ * num_channels_)) {
302              break;  // It doesn't get better than perfect.
303            }
304          }
305        }
306      }
307      merge_offset = best_offset;
308    } else if ((output_merge_cnt > 0) && !draining_) {
309      LOGE("no correlation performed");
310    }
311
312    // Crossfade the overlap between input and output, and then
313    // copy in the remaining input.
314    int crossfade_count = max(0, (output_merge_cnt - merge_offset));
315    crossfade_count = min(crossfade_count, input_count);
316    int remaining_count = input_count - crossfade_count;
317
318    float* merge_pointer = output_pointer + (merge_offset * num_channels_);
319    float flt_count = static_cast<float>(crossfade_count);
320    for (int i = 0; i < crossfade_count; ++i) {
321      // Linear cross-fade, for now.
322      float input_scale = static_cast<float>(i) / flt_count;
323      float output_scale = 1. - input_scale;
324      for (int j = 0; j < num_channels_; ++j) {
325        *merge_pointer = (*merge_pointer * output_scale) +
326                         (*input_pointer++ * input_scale);
327        ++merge_pointer;
328      }
329    }
330    // Copy the merged buffer back into the output, if necessary, and
331    // append the rest of the window.
332    output_buffer_->MergeBack(kOutputAnalysis,
333                              output_pointer, output_merge_cnt);
334    output_buffer_->Write(input_pointer, remaining_count);
335
336    // Advance the output analysis pointer to the next target merge point,
337    // minus half an overlap window.  The target merge point is always
338    // calculated as a delta from the previous ideal target, not the actual
339    // target, to avoid drift.
340    int output_advance = target_merge_offset_;
341    if (output_merge_cnt < half_overlap_frames_) {
342      // On the first window, back up the pointer for the next correlation.
343      // Thereafter, that compensation is preserved.
344      output_advance -= half_overlap_frames_;
345    }
346
347    // Don't advance beyond the available data, when finishing up.
348    if (draining_) {
349      output_advance =
350          min(output_advance, output_buffer_->available(kOutputAnalysis));
351    }
352    output_buffer_->Seek(kOutputAnalysis,
353        output_buffer_->Tell(kOutputAnalysis) + output_advance);
354
355    // Advance the input pointer beyond the frames that are no longer needed.
356    input_buffer_->Seek(kInputReader, input_buffer_->Tell(kInputReader) +
357                        min(input_count, input_window_offset_));
358
359    if ((crossfade_count + remaining_count) > 0) {
360      generated_data = true;
361    }
362  }  // while (more to process)
363  return generated_data;
364}
365
366}  // namespace video_editing
367