accelerate.cc revision dce40cf804019a9898b6ab8d8262466b697c56e0
148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos/*
248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
348087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos *
448087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos *  Use of this source code is governed by a BSD-style license
548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos *  that can be found in the LICENSE file in the root of the source
648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos *  tree. An additional intellectual property rights grant can be found
748087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos *  in the file PATENTS.  All contributing project authors may
848087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos *  be found in the AUTHORS file in the root of the source tree.
948087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos */
1048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
1148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos#include "webrtc/modules/audio_coding/neteq/accelerate.h"
1248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
1348087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
1448087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
1548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulosnamespace webrtc {
1648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
1748087f5f0eb08759ee763f98daf3b34becb74559Pyry HaulosAccelerate::ReturnCodes Accelerate::Process(const int16_t* input,
1848087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos                                            size_t input_length,
1984322c9402f810da3cd80b52e9f9ef72150a9004Alexander Galazin                                            bool fast_accelerate,
2048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos                                            AudioMultiVector* output,
2148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos                                            size_t* length_change_samples) {
2284322c9402f810da3cd80b52e9f9ef72150a9004Alexander Galazin  // Input length must be (almost) 30 ms.
2348087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  static const size_t k15ms = 120;  // 15 ms = 120 samples at 8 kHz sample rate.
2448087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  if (num_channels_ == 0 ||
2548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos      input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) {
2648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // Length of input data too short to do accelerate. Simply move all data
2748087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // from input to output.
2884322c9402f810da3cd80b52e9f9ef72150a9004Alexander Galazin    output->PushBackInterleaved(input, input_length);
2948087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    return kError;
3048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  }
3148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  return TimeStretch::Process(input, input_length, fast_accelerate, output,
3248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos                              length_change_samples);
3348087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos}
3448087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
3548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulosvoid Accelerate::SetParametersForPassiveSpeech(size_t /*len*/,
3648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos                                               int16_t* best_correlation,
3748087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos                                               size_t* /*peak_index*/) const {
3848087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  // When the signal does not contain any active speech, the correlation does
3948087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  // not matter. Simply set it to zero.
4048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  *best_correlation = 0;
4148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos}
4248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
4348087f5f0eb08759ee763f98daf3b34becb74559Pyry HaulosAccelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch(
4448087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    const int16_t* input,
4548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    size_t input_length,
4648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    size_t peak_index,
4748087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    int16_t best_correlation,
4848087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    bool active_speech,
4948087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    bool fast_mode,
5048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    AudioMultiVector* output) const {
5148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  // Check for strong correlation or passive speech.
5248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  // Use 8192 (0.5 in Q14) in fast mode.
5348087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  const int correlation_threshold = fast_mode ? 8192 : kCorrelationThreshold;
5448087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  if ((best_correlation > correlation_threshold) || !active_speech) {
5548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // Do accelerate operation by overlap add.
5648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
5748087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // Pre-calculate common multiplication with |fs_mult_|.
5848087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // 120 corresponds to 15 ms.
5948087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    size_t fs_mult_120 = fs_mult_ * 120;
6048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
6148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    if (fast_mode) {
6248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos      // Fit as many multiples of |peak_index| as possible in fs_mult_120.
6348087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos      // TODO(henrik.lundin) Consider finding multiple correlation peaks and
6448087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos      // pick the one with the longest correlation lag in this case.
6548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos      peak_index = (fs_mult_120 / peak_index) * peak_index;
6648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    }
6748087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
6848087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    assert(fs_mult_120 >= peak_index);  // Should be handled in Process().
6948087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // Copy first part; 0 to 15 ms.
7048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    output->PushBackInterleaved(input, fs_mult_120 * num_channels_);
7148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // Copy the |peak_index| starting at 15 ms to |temp_vector|.
7248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    AudioMultiVector temp_vector(num_channels_);
7348087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    temp_vector.PushBackInterleaved(&input[fs_mult_120 * num_channels_],
7448087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos                                    peak_index * num_channels_);
7548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // Cross-fade |temp_vector| onto the end of |output|.
7648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    output->CrossFade(temp_vector, peak_index);
7748087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // Copy the last unmodified part, 15 ms + pitch period until the end.
7848087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    output->PushBackInterleaved(
7948087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos        &input[(fs_mult_120 + peak_index) * num_channels_],
8048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos        input_length - (fs_mult_120 + peak_index) * num_channels_);
8148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
8248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    if (active_speech) {
8348087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos      return kSuccess;
8448087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    } else {
8548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos      return kSuccessLowEnergy;
8648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    }
8748087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  } else {
8848087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    // Accelerate not allowed. Simply move all data from decoded to outData.
8948087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    output->PushBackInterleaved(input, input_length);
9048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    return kNoStretch;
9148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  }
9248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos}
9348087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
9448087f5f0eb08759ee763f98daf3b34becb74559Pyry HaulosAccelerate* AccelerateFactory::Create(
9548087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    int sample_rate_hz,
9648087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    size_t num_channels,
9748087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos    const BackgroundNoise& background_noise) const {
9848087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos  return new Accelerate(sample_rate_hz, num_channels, background_noise);
9948087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos}
10048087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos
10148087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos}  // namespace webrtc
10248087f5f0eb08759ee763f98daf3b34becb74559Pyry Haulos