1c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/* Copyright (c) 2014, Cisco Systems, INC 2c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Written by XiangMingZhu WeiZhou MinPeng YanWang 3c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 4c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Redistribution and use in source and binary forms, with or without 5c91ee5b5642fcc4969150f73d5f6848f88bf1638flim modification, are permitted provided that the following conditions 6c91ee5b5642fcc4969150f73d5f6848f88bf1638flim are met: 7c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 8c91ee5b5642fcc4969150f73d5f6848f88bf1638flim - Redistributions of source code must retain the above copyright 9c91ee5b5642fcc4969150f73d5f6848f88bf1638flim notice, this list of conditions and the following disclaimer. 10c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 11c91ee5b5642fcc4969150f73d5f6848f88bf1638flim - Redistributions in binary form must reproduce the above copyright 12c91ee5b5642fcc4969150f73d5f6848f88bf1638flim notice, this list of conditions and the following disclaimer in the 13c91ee5b5642fcc4969150f73d5f6848f88bf1638flim documentation and/or other materials provided with the distribution. 14c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 15c91ee5b5642fcc4969150f73d5f6848f88bf1638flim THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18c91ee5b5642fcc4969150f73d5f6848f88bf1638flim A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19c91ee5b5642fcc4969150f73d5f6848f88bf1638flim OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20c91ee5b5642fcc4969150f73d5f6848f88bf1638flim EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21c91ee5b5642fcc4969150f73d5f6848f88bf1638flim PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22c91ee5b5642fcc4969150f73d5f6848f88bf1638flim PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25c91ee5b5642fcc4969150f73d5f6848f88bf1638flim SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26c91ee5b5642fcc4969150f73d5f6848f88bf1638flim*/ 27c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 28c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifdef HAVE_CONFIG_H 29c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "config.h" 30c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif 31c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 32c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include <xmmintrin.h> 33c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include <emmintrin.h> 34c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include <smmintrin.h> 35c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "main.h" 36c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "celt/x86/x86cpu.h" 37c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 38c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "stack_alloc.h" 39c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 40c91ee5b5642fcc4969150f73d5f6848f88bf1638flimtypedef struct { 41c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; 42c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 RandState[ DECISION_DELAY ]; 43c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 Q_Q10[ DECISION_DELAY ]; 44c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 Xq_Q14[ DECISION_DELAY ]; 45c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 Pred_Q15[ DECISION_DELAY ]; 46c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 Shape_Q14[ DECISION_DELAY ]; 47c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; 48c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 LF_AR_Q14; 49c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 Seed; 50c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 SeedInit; 51c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 RD_Q10; 52c91ee5b5642fcc4969150f73d5f6848f88bf1638flim} NSQ_del_dec_struct; 53c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 54c91ee5b5642fcc4969150f73d5f6848f88bf1638flimtypedef struct { 55c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 Q_Q10; 56c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 RD_Q10; 57c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 xq_Q14; 58c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 LF_AR_Q14; 59c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 sLTP_shp_Q14; 60c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 LPC_exc_Q14; 61c91ee5b5642fcc4969150f73d5f6848f88bf1638flim} NSQ_sample_struct; 62c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 63c91ee5b5642fcc4969150f73d5f6848f88bf1638flimtypedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; 64c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 65c91ee5b5642fcc4969150f73d5f6848f88bf1638flimstatic OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( 66c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const silk_encoder_state *psEncC, /* I Encoder State */ 67c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_nsq_state *NSQ, /* I/O NSQ state */ 68c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ 69c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int32 x_Q3[], /* I Input in Q3 */ 70c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ 71c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ 72c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ 73c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int subfr, /* I Subframe number */ 74c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int nStatesDelayedDecision, /* I Number of del dec states */ 75c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int LTP_scale_Q14, /* I LTP state scaling */ 76c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ 77c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ 78c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int signal_type, /* I Signal type */ 79c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int decisionDelay /* I Decision delay */ 80c91ee5b5642fcc4969150f73d5f6848f88bf1638flim); 81c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 82c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/******************************************/ 83c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/* Noise shape quantizer for one subframe */ 84c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/******************************************/ 85c91ee5b5642fcc4969150f73d5f6848f88bf1638flimstatic OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( 86c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_nsq_state *NSQ, /* I/O NSQ state */ 87c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ 88c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int signalType, /* I Signal type */ 89c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int32 x_Q10[], /* I */ 90c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int8 pulses[], /* O */ 91c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int16 xq[], /* O */ 92c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 sLTP_Q15[], /* I/O LTP filter state */ 93c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ 94c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 a_Q12[], /* I Short term prediction coefs */ 95c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 b_Q14[], /* I Long term prediction coefs */ 96c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ 97c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int lag, /* I Pitch lag */ 98c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 HarmShapeFIRPacked_Q14, /* I */ 99c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int Tilt_Q14, /* I Spectral tilt */ 100c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 LF_shp_Q14, /* I */ 101c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 Gain_Q16, /* I */ 102c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int Lambda_Q10, /* I */ 103c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int offset_Q10, /* I */ 104c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int length, /* I Input length */ 105c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int subfr, /* I Subframe number */ 106c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int shapingLPCOrder, /* I Shaping LPC filter order */ 107c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int predictLPCOrder, /* I Prediction filter order */ 108c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int warping_Q16, /* I */ 109c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ 110c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ 111c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int decisionDelay /* I */ 112c91ee5b5642fcc4969150f73d5f6848f88bf1638flim); 113c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 114c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid silk_NSQ_del_dec_sse4_1( 115c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const silk_encoder_state *psEncC, /* I/O Encoder State */ 116c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_nsq_state *NSQ, /* I/O NSQ state */ 117c91ee5b5642fcc4969150f73d5f6848f88bf1638flim SideInfoIndices *psIndices, /* I/O Quantization Indices */ 118c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int32 x_Q3[], /* I Prefiltered input signal */ 119c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int8 pulses[], /* O Quantized pulse signal */ 120c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ 121c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ 122c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ 123c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ 124c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ 125c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ 126c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ 127c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ 128c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ 129c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int LTP_scale_Q14 /* I LTP state scaling */ 130c91ee5b5642fcc4969150f73d5f6848f88bf1638flim) 131c91ee5b5642fcc4969150f73d5f6848f88bf1638flim{ 132c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; 133c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int last_smple_idx, smpl_buf_idx, decisionDelay; 134c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; 135c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int16 *pxq; 136c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL( opus_int32, sLTP_Q15 ); 137c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL( opus_int16, sLTP ); 138c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 HarmShapeFIRPacked_Q14; 139c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int offset_Q10; 140c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 RDmin_Q10, Gain_Q10; 141c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL( opus_int32, x_sc_Q10 ); 142c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL( opus_int32, delayedGain_Q10 ); 143c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL( NSQ_del_dec_struct, psDelDec ); 144c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ_del_dec_struct *psDD; 145c91ee5b5642fcc4969150f73d5f6848f88bf1638flim SAVE_STACK; 146c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 147c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Set unvoiced lag to the previous one, overwrite later for voiced */ 148c91ee5b5642fcc4969150f73d5f6848f88bf1638flim lag = NSQ->lagPrev; 149c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 150c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( NSQ->prev_gain_Q16 != 0 ); 151c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 152c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Initialize delayed decision states */ 153c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct ); 154c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) ); 155c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) { 156c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD = &psDelDec[ k ]; 157c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Seed = ( k + psIndices->Seed ) & 3; 158c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->SeedInit = psDD->Seed; 159c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->RD_Q10 = 0; 160c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; 161c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; 162c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); 163c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); 164c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 165c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 166c91ee5b5642fcc4969150f73d5f6848f88bf1638flim offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; 167c91ee5b5642fcc4969150f73d5f6848f88bf1638flim smpl_buf_idx = 0; /* index of oldest samples */ 168c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 169c91ee5b5642fcc4969150f73d5f6848f88bf1638flim decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length ); 170c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 171c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* For voiced frames limit the decision delay to lower than the pitch lag */ 172c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( psIndices->signalType == TYPE_VOICED ) { 173c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 0; k < psEncC->nb_subfr; k++ ) { 174c91ee5b5642fcc4969150f73d5f6848f88bf1638flim decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 ); 175c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 176c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } else { 177c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( lag > 0 ) { 178c91ee5b5642fcc4969150f73d5f6848f88bf1638flim decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 ); 179c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 180c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 181c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 182c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( psIndices->NLSFInterpCoef_Q2 == 4 ) { 183c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LSF_interpolation_flag = 0; 184c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } else { 185c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LSF_interpolation_flag = 1; 186c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 187c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 188c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC( sLTP_Q15, 189c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); 190c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); 191c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); 192c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); 193c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Set up pointers to start of sub frame */ 194c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; 195c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; 196c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; 197c91ee5b5642fcc4969150f73d5f6848f88bf1638flim subfr = 0; 198c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 0; k < psEncC->nb_subfr; k++ ) { 199c91ee5b5642fcc4969150f73d5f6848f88bf1638flim A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; 200c91ee5b5642fcc4969150f73d5f6848f88bf1638flim B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; 201c91ee5b5642fcc4969150f73d5f6848f88bf1638flim AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; 202c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 203c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Noise shape parameters */ 204c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); 205c91ee5b5642fcc4969150f73d5f6848f88bf1638flim HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); 206c91ee5b5642fcc4969150f73d5f6848f88bf1638flim HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); 207c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 208c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->rewhite_flag = 0; 209c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( psIndices->signalType == TYPE_VOICED ) { 210c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Voiced */ 211c91ee5b5642fcc4969150f73d5f6848f88bf1638flim lag = pitchL[ k ]; 212c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 213c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Re-whitening */ 214c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { 215c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( k == 2 ) { 216c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* RESET DELAYED DECISIONS */ 217c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Find winner */ 218c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_Q10 = psDelDec[ 0 ].RD_Q10; 219c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Winner_ind = 0; 220c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) { 221c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) { 222c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_Q10 = psDelDec[ i ].RD_Q10; 223c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Winner_ind = i; 224c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 225c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 226c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) { 227c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( i != Winner_ind ) { 228c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 ); 229c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( psDelDec[ i ].RD_Q10 >= 0 ); 230c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 231c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 232c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 233c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Copy final part of signals from winner state to output and long-term filter states */ 234c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD = &psDelDec[ Winner_ind ]; 235c91ee5b5642fcc4969150f73d5f6848f88bf1638flim last_smple_idx = smpl_buf_idx + decisionDelay; 236c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = 0; i < decisionDelay; i++ ) { 237c91ee5b5642fcc4969150f73d5f6848f88bf1638flim last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; 238c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); 239c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( 240c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); 241c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; 242c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 243c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 244c91ee5b5642fcc4969150f73d5f6848f88bf1638flim subfr = 0; 245c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 246c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 247c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Rewhiten with new A coefs */ 248c91ee5b5642fcc4969150f73d5f6848f88bf1638flim start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; 249c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( start_idx > 0 ); 250c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 251c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], 252c91ee5b5642fcc4969150f73d5f6848f88bf1638flim A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch ); 253c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 254c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; 255c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->rewhite_flag = 1; 256c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 257c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 258c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 259c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, 260c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); 261c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 262c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, 263c91ee5b5642fcc4969150f73d5f6848f88bf1638flim delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], 264c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, 265c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay ); 266c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 267c91ee5b5642fcc4969150f73d5f6848f88bf1638flim x_Q3 += psEncC->subfr_length; 268c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pulses += psEncC->subfr_length; 269c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pxq += psEncC->subfr_length; 270c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 271c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 272c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Find winner */ 273c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_Q10 = psDelDec[ 0 ].RD_Q10; 274c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Winner_ind = 0; 275c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) { 276c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) { 277c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_Q10 = psDelDec[ k ].RD_Q10; 278c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Winner_ind = k; 279c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 280c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 281c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 282c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Copy final part of signals from winner state to output and long-term filter states */ 283c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD = &psDelDec[ Winner_ind ]; 284c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psIndices->Seed = psDD->SeedInit; 285c91ee5b5642fcc4969150f73d5f6848f88bf1638flim last_smple_idx = smpl_buf_idx + decisionDelay; 286c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); 287c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = 0; i < decisionDelay; i++ ) { 288c91ee5b5642fcc4969150f73d5f6848f88bf1638flim last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; 289c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); 290c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( 291c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); 292c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; 293c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 294c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); 295c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) ); 296c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 297c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Update states */ 298c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; 299c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; 300c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 301c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Save quantized speech signal */ 302c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ 303c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); 304c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); 305c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RESTORE_STACK; 306c91ee5b5642fcc4969150f73d5f6848f88bf1638flim} 307c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 308c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/******************************************/ 309c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/* Noise shape quantizer for one subframe */ 310c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/******************************************/ 311c91ee5b5642fcc4969150f73d5f6848f88bf1638flimstatic OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1( 312c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_nsq_state *NSQ, /* I/O NSQ state */ 313c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ 314c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int signalType, /* I Signal type */ 315c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int32 x_Q10[], /* I */ 316c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int8 pulses[], /* O */ 317c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int16 xq[], /* O */ 318c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 sLTP_Q15[], /* I/O LTP filter state */ 319c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ 320c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 a_Q12[], /* I Short term prediction coefs */ 321c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 b_Q14[], /* I Long term prediction coefs */ 322c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ 323c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int lag, /* I Pitch lag */ 324c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 HarmShapeFIRPacked_Q14, /* I */ 325c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int Tilt_Q14, /* I Spectral tilt */ 326c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 LF_shp_Q14, /* I */ 327c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 Gain_Q16, /* I */ 328c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int Lambda_Q10, /* I */ 329c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int offset_Q10, /* I */ 330c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int length, /* I Input length */ 331c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int subfr, /* I Subframe number */ 332c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int shapingLPCOrder, /* I Shaping LPC filter order */ 333c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int predictLPCOrder, /* I Prediction filter order */ 334c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int warping_Q16, /* I */ 335c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ 336c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ 337c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int decisionDelay /* I */ 338c91ee5b5642fcc4969150f73d5f6848f88bf1638flim) 339c91ee5b5642fcc4969150f73d5f6848f88bf1638flim{ 340c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; 341c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 Winner_rand_state; 342c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; 343c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; 344c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; 345c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; 346c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; 347c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL( NSQ_sample_pair, psSampleState ); 348c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ_del_dec_struct *psDD; 349c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ_sample_struct *psSS; 350c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 351c91ee5b5642fcc4969150f73d5f6848f88bf1638flim __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF; 352c91ee5b5642fcc4969150f73d5f6848f88bf1638flim __m128i b_Q12_0123, b_sr_Q12_0123; 353c91ee5b5642fcc4969150f73d5f6848f88bf1638flim SAVE_STACK; 354c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 355c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( nStatesDelayedDecision > 0 ); 356c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); 357c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 358c91ee5b5642fcc4969150f73d5f6848f88bf1638flim shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; 359c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; 360c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); 361c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 362c91ee5b5642fcc4969150f73d5f6848f88bf1638flim a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 ); 363c91ee5b5642fcc4969150f73d5f6848f88bf1638flim a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 ); 364c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 365c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( opus_likely( predictLPCOrder == 16 ) ) { 366c91ee5b5642fcc4969150f73d5f6848f88bf1638flim a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 ); 367c91ee5b5642fcc4969150f73d5f6848f88bf1638flim a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 ); 368c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 369c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 370c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( signalType == TYPE_VOICED ){ 371c91ee5b5642fcc4969150f73d5f6848f88bf1638flim b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 ); 372c91ee5b5642fcc4969150f73d5f6848f88bf1638flim b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ 373c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 374c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = 0; i < length; i++ ) { 375c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Perform common calculations used in all states */ 376c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 377c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Long-term prediction */ 378c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( signalType == TYPE_VOICED ) { 379c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Unrolled loop */ 380c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ 381c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LTP_pred_Q14 = 2; 382c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 383c91ee5b5642fcc4969150f73d5f6848f88bf1638flim __m128i tmpa, tmpb, pred_lag_ptr_tmp; 384c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pred_lag_ptr_tmp = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) ); 385c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B ); 386c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_0123 ); 387c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_srli_si128( tmpa, 2 ); 388c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 389c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */ 390c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pred_lag_ptr_tmp = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_0123 ); 391c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pred_lag_ptr_tmp = _mm_srli_si128( pred_lag_ptr_tmp, 2 ); 392c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpa ); 393c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 394c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3, 2 ) );/* equal shift right 8 bytes */ 395c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pred_lag_ptr_tmp = _mm_add_epi32( pred_lag_ptr_tmp, tmpb ); 396c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LTP_pred_Q14 += _mm_cvtsi128_si32( pred_lag_ptr_tmp ); 397c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 398c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); 399c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ 400c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pred_lag_ptr++; 401c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 402c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } else { 403c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LTP_pred_Q14 = 0; 404c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 405c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 406c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Long-term shaping */ 407c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( lag > 0 ) { 408c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Symmetric, packed FIR coefficients */ 409c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); 410c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); 411c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ 412c91ee5b5642fcc4969150f73d5f6848f88bf1638flim shp_lag_ptr++; 413c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } else { 414c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_LTP_Q14 = 0; 415c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 416c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 417c91ee5b5642fcc4969150f73d5f6848f88bf1638flim __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp; 418c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 419c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 0; k < nStatesDelayedDecision; k++ ) { 420c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Delayed decision state */ 421c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD = &psDelDec[ k ]; 422c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 423c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Sample state */ 424c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS = psSampleState[ k ]; 425c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 426c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Generate dither */ 427c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Seed = silk_RAND( psDD->Seed ); 428c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 429c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Pointer used in short term prediction and shaping */ 430c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; 431c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Short-term prediction */ 432c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); 433c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ 434c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); 435c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 436c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_setzero_si128(); 437c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 438c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* step 1 */ 439c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */ 440c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); /* 0, -1, -2, -3 */ 441c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 ); /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */ 442c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 443c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_srli_epi64( tmpa, 16 ); 444c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, tmpa ); 445c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 446c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ 447c91ee5b5642fcc4969150f73d5f6848f88bf1638flim a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ 448c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /* 1*-1, 3*-3 */ 449c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); 450c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); 451c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 452c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* step 2 */ 453c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) ); 454c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); 455c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 ); 456c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_srli_epi64( tmpa, 16 ); 457c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, tmpa ); 458c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 459c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ 460c91ee5b5642fcc4969150f73d5f6848f88bf1638flim a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ 461c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); 462c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); 463c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); 464c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 465c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if ( opus_likely( predictLPCOrder == 16 ) ) 466c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 467c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* step 3 */ 468c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -11 ] ) ); 469c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); 470c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB ); 471c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_srli_epi64( tmpa, 16 ); 472c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, tmpa ); 473c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 474c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ 475c91ee5b5642fcc4969150f73d5f6848f88bf1638flim a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3, 2, 1 ) );/* equal shift right 4 bytes */ 476c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); 477c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); 478c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); 479c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 480c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* setp 4 */ 481c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) ); 482c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); 483c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF ); 484c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_srli_epi64( tmpa, 16 ); 485c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, tmpa ); 486c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 487c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ 488c91ee5b5642fcc4969150f73d5f6848f88bf1638flim a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */ 489c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); 490c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psLPC_Q14_tmp = _mm_srli_epi64( psLPC_Q14_tmp, 16 ); 491c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp ); 492c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 493c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* add at last */ 494c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* equal shift right 8 bytes*/ 495c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); 496c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, tmpa ); 497c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); 498c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 499c91ee5b5642fcc4969150f73d5f6848f88bf1638flim else 500c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 501c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* add at last */ 502c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpa = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); /* equal shift right 8 bytes*/ 503c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmpb = _mm_add_epi32( tmpb, tmpa ); 504c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LPC_pred_Q14 += _mm_cvtsi128_si32( tmpb ); 505c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 506c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] ); 507c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] ); 508c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 509c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 510c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ 511c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 512c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Noise shape feedback */ 513c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ 514c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Output of lowpass section */ 515c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); 516c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Output of allpass section */ 517c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); 518c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->sAR2_Q14[ 0 ] = tmp2; 519c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); 520c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); 521c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Loop over allpass sections */ 522c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( j = 2; j < shapingLPCOrder; j += 2 ) { 523c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Output of allpass section */ 524c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 ); 525c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->sAR2_Q14[ j - 1 ] = tmp1; 526c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] ); 527c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Output of allpass section */ 528c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 ); 529c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->sAR2_Q14[ j + 0 ] = tmp2; 530c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); 531c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 532c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; 533c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); 534c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 535c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ 536c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ 537c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ 538c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 539c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ 540c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ 541c91ee5b5642fcc4969150f73d5f6848f88bf1638flim n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ 542c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 543c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Input minus prediction plus noise feedback */ 544c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ 545c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ 546c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ 547c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ 548c91ee5b5642fcc4969150f73d5f6848f88bf1638flim tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ 549c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 550c91ee5b5642fcc4969150f73d5f6848f88bf1638flim r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ 551c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 552c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Flip sign depending on dither */ 553c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if ( psDD->Seed < 0 ) { 554c91ee5b5642fcc4969150f73d5f6848f88bf1638flim r_Q10 = -r_Q10; 555c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 556c91ee5b5642fcc4969150f73d5f6848f88bf1638flim r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); 557c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 558c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Find two quantization level candidates and measure their rate-distortion */ 559c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); 560c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); 561c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( q1_Q0 > 0 ) { 562c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); 563c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); 564c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q2_Q10 = silk_ADD32( q1_Q10, 1024 ); 565c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); 566c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); 567c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } else if( q1_Q0 == 0 ) { 568c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q1_Q10 = offset_Q10; 569c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); 570c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); 571c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); 572c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } else if( q1_Q0 == -1 ) { 573c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q2_Q10 = offset_Q10; 574c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); 575c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); 576c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); 577c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } else { /* q1_Q0 < -1 */ 578c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); 579c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); 580c91ee5b5642fcc4969150f73d5f6848f88bf1638flim q2_Q10 = silk_ADD32( q1_Q10, 1024 ); 581c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); 582c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); 583c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 584c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); 585c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); 586c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); 587c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); 588c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 589c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( rd1_Q10 < rd2_Q10 ) { 590c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); 591c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); 592c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 0 ].Q_Q10 = q1_Q10; 593c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 1 ].Q_Q10 = q2_Q10; 594c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } else { 595c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); 596c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); 597c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 0 ].Q_Q10 = q2_Q10; 598c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 1 ].Q_Q10 = q1_Q10; 599c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 600c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 601c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Update states for best quantization */ 602c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 603c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Quantized excitation */ 604c91ee5b5642fcc4969150f73d5f6848f88bf1638flim exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); 605c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if ( psDD->Seed < 0 ) { 606c91ee5b5642fcc4969150f73d5f6848f88bf1638flim exc_Q14 = -exc_Q14; 607c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 608c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 609c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Add predictions */ 610c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); 611c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); 612c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 613c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Update states */ 614c91ee5b5642fcc4969150f73d5f6848f88bf1638flim sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); 615c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); 616c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; 617c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; 618c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 0 ].xq_Q14 = xq_Q14; 619c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 620c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Update states for second best quantization */ 621c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 622c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Quantized excitation */ 623c91ee5b5642fcc4969150f73d5f6848f88bf1638flim exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); 624c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if ( psDD->Seed < 0 ) { 625c91ee5b5642fcc4969150f73d5f6848f88bf1638flim exc_Q14 = -exc_Q14; 626c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 627c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 628c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 629c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Add predictions */ 630c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); 631c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); 632c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 633c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Update states */ 634c91ee5b5642fcc4969150f73d5f6848f88bf1638flim sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); 635c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); 636c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; 637c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; 638c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS[ 1 ].xq_Q14 = xq_Q14; 639c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 640c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 641c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ 642c91ee5b5642fcc4969150f73d5f6848f88bf1638flim last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ 643c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 644c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Find winner */ 645c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; 646c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Winner_ind = 0; 647c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 1; k < nStatesDelayedDecision; k++ ) { 648c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { 649c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; 650c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Winner_ind = k; 651c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 652c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 653c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 654c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Increase RD values of expired states */ 655c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; 656c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 0; k < nStatesDelayedDecision; k++ ) { 657c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { 658c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); 659c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); 660c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); 661c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 662c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 663c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 664c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Find worst in first set and best in second set */ 665c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; 666c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; 667c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmax_ind = 0; 668c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_ind = 0; 669c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 1; k < nStatesDelayedDecision; k++ ) { 670c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* find worst in first set */ 671c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { 672c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; 673c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmax_ind = k; 674c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 675c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* find best in second set */ 676c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { 677c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; 678c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RDmin_ind = k; 679c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 680c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 681c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 682c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Replace a state if best from second set outperforms worst in first set */ 683c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( RDmin_Q10 < RDmax_Q10 ) { 684c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, 685c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); 686c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); 687c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 688c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 689c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Write samples from winner to output and long-term filter states */ 690c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD = &psDelDec[ Winner_ind ]; 691c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( subfr > 0 || i >= decisionDelay ) { 692c91ee5b5642fcc4969150f73d5f6848f88bf1638flim pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); 693c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( 694c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); 695c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; 696c91ee5b5642fcc4969150f73d5f6848f88bf1638flim sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; 697c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 698c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLTP_shp_buf_idx++; 699c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLTP_buf_idx++; 700c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 701c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Update states */ 702c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 0; k < nStatesDelayedDecision; k++ ) { 703c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD = &psDelDec[ k ]; 704c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psSS = &psSampleState[ k ][ 0 ]; 705c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->LF_AR_Q14 = psSS->LF_AR_Q14; 706c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; 707c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; 708c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; 709c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); 710c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; 711c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); 712c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; 713c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->RD_Q10 = psSS->RD_Q10; 714c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 715c91ee5b5642fcc4969150f73d5f6848f88bf1638flim delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; 716c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 717c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Update LPC states */ 718c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 0; k < nStatesDelayedDecision; k++ ) { 719c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD = &psDelDec[ k ]; 720c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); 721c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 722c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RESTORE_STACK; 723c91ee5b5642fcc4969150f73d5f6848f88bf1638flim} 724c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 725c91ee5b5642fcc4969150f73d5f6848f88bf1638flimstatic OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1( 726c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const silk_encoder_state *psEncC, /* I Encoder State */ 727c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_nsq_state *NSQ, /* I/O NSQ state */ 728c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ 729c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int32 x_Q3[], /* I Input in Q3 */ 730c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ 731c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ 732c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ 733c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int subfr, /* I Subframe number */ 734c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int nStatesDelayedDecision, /* I Number of del dec states */ 735c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int LTP_scale_Q14, /* I LTP state scaling */ 736c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ 737c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ 738c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int signal_type, /* I Signal type */ 739c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int decisionDelay /* I Decision delay */ 740c91ee5b5642fcc4969150f73d5f6848f88bf1638flim) 741c91ee5b5642fcc4969150f73d5f6848f88bf1638flim{ 742c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int i, k, lag; 743c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; 744c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ_del_dec_struct *psDD; 745c91ee5b5642fcc4969150f73d5f6848f88bf1638flim __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1; 746c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 747c91ee5b5642fcc4969150f73d5f6848f88bf1638flim lag = pitchL[ subfr ]; 748c91ee5b5642fcc4969150f73d5f6848f88bf1638flim inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); 749c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 750c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( inv_gain_Q31 != 0 ); 751c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 752c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Calculate gain adjustment factor */ 753c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { 754c91ee5b5642fcc4969150f73d5f6848f88bf1638flim gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); 755c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } else { 756c91ee5b5642fcc4969150f73d5f6848f88bf1638flim gain_adj_Q16 = (opus_int32)1 << 16; 757c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 758c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 759c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Scale input */ 760c91ee5b5642fcc4969150f73d5f6848f88bf1638flim inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); 761c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 762c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* prepare inv_gain_Q23 in packed 4 32-bits */ 763c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23); 764c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 765c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) { 766c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) ); 767c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* equal shift right 4 bytes*/ 768c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); 769c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 770c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 ); 771c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 ); 772c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 773c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 ); 774c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 ); 775c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 776c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC ); 777c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 778c91ee5b5642fcc4969150f73d5f6848f88bf1638flim _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 ); 779c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 780c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 781c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( ; i < psEncC->subfr_length; i++ ) { 782c91ee5b5642fcc4969150f73d5f6848f88bf1638flim x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); 783c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 784c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 785c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Save inverse gain */ 786c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; 787c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 788c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ 789c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( NSQ->rewhite_flag ) { 790c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( subfr == 0 ) { 791c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Do LTP downscaling */ 792c91ee5b5642fcc4969150f73d5f6848f88bf1638flim inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); 793c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 794c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { 795c91ee5b5642fcc4969150f73d5f6848f88bf1638flim silk_assert( i < MAX_FRAME_LENGTH ); 796c91ee5b5642fcc4969150f73d5f6848f88bf1638flim sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); 797c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 798c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 799c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 800c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Adjust for changing gain */ 801c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( gain_adj_Q16 != (opus_int32)1 << 16 ) { 802c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Scale long-term shaping state */ 803c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 804c91ee5b5642fcc4969150f73d5f6848f88bf1638flim __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1; 805c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 806c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* prepare gain_adj_Q16 in packed 4 32-bits */ 807c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 ); 808c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 809c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 ) 810c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 811c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) ); 812c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* equal shift right 4 bytes*/ 813c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) ); 814c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 815c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 ); 816c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 ); 817c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 818c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 ); 819c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 ); 820c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 821c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC ); 822c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 823c91ee5b5642fcc4969150f73d5f6848f88bf1638flim _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 ); 824c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 825c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 826c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) { 827c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); 828c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 829c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 830c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Scale long-term prediction state */ 831c91ee5b5642fcc4969150f73d5f6848f88bf1638flim if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { 832c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) { 833c91ee5b5642fcc4969150f73d5f6848f88bf1638flim sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); 834c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 835c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 836c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 837c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( k = 0; k < nStatesDelayedDecision; k++ ) { 838c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD = &psDelDec[ k ]; 839c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 840c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Scale scalar states */ 841c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); 842c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 843c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Scale short-term prediction and shaping states */ 844c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { 845c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] ); 846c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 847c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { 848c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] ); 849c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 850c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for( i = 0; i < DECISION_DELAY; i++ ) { 851c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[ i ] ); 852c91ee5b5642fcc4969150f73d5f6848f88bf1638flim psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] ); 853c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 854c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 855c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 856c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 857c91ee5b5642fcc4969150f73d5f6848f88bf1638flim} 858