1c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/* Copyright (c) 2014, Cisco Systems, INC
2c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   Written by XiangMingZhu WeiZhou MinPeng YanWang
3c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
4c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   Redistribution and use in source and binary forms, with or without
5c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   modification, are permitted provided that the following conditions
6c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   are met:
7c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
8c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   - Redistributions of source code must retain the above copyright
9c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   notice, this list of conditions and the following disclaimer.
10c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
11c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   - Redistributions in binary form must reproduce the above copyright
12c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   notice, this list of conditions and the following disclaimer in the
13c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   documentation and/or other materials provided with the distribution.
14c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
15c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26c91ee5b5642fcc4969150f73d5f6848f88bf1638flim*/
27c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
28c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifdef HAVE_CONFIG_H
29c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "config.h"
30c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif
31c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
32c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include <xmmintrin.h>
33c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include <emmintrin.h>
34c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include <smmintrin.h>
35c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "main.h"
36c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "celt/x86/x86cpu.h"
37c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
38c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "stack_alloc.h"
39c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
40c91ee5b5642fcc4969150f73d5f6848f88bf1638flimtypedef struct {
41c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
42c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 RandState[ DECISION_DELAY ];
43c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 Q_Q10[     DECISION_DELAY ];
44c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 Xq_Q14[    DECISION_DELAY ];
45c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 Pred_Q15[  DECISION_DELAY ];
46c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 Shape_Q14[ DECISION_DELAY ];
47c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
48c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 LF_AR_Q14;
49c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 Seed;
50c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 SeedInit;
51c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 RD_Q10;
52c91ee5b5642fcc4969150f73d5f6848f88bf1638flim} NSQ_del_dec_struct;
53c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
54c91ee5b5642fcc4969150f73d5f6848f88bf1638flimtypedef struct {
55c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 Q_Q10;
56c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 RD_Q10;
57c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 xq_Q14;
58c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 LF_AR_Q14;
59c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 sLTP_shp_Q14;
60c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32 LPC_exc_Q14;
61c91ee5b5642fcc4969150f73d5f6848f88bf1638flim} NSQ_sample_struct;
62c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
63c91ee5b5642fcc4969150f73d5f6848f88bf1638flimtypedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
64c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
65c91ee5b5642fcc4969150f73d5f6848f88bf1638flimstatic OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
66c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
67c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
68c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
69c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
70c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
71c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
72c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
73c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            subfr,                      /* I    Subframe number                     */
74c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
75c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
76c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
77c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
78c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int      signal_type,                /* I    Signal type                         */
79c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int      decisionDelay               /* I    Decision delay                      */
80c91ee5b5642fcc4969150f73d5f6848f88bf1638flim);
81c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
82c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/******************************************/
83c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/* Noise shape quantizer for one subframe */
84c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/******************************************/
85c91ee5b5642fcc4969150f73d5f6848f88bf1638flimstatic OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
86c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
87c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
88c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            signalType,             /* I    Signal type                         */
89c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int32    x_Q10[],                /* I                                        */
90c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int8           pulses[],               /* O                                        */
91c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int16          xq[],                   /* O                                        */
92c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
93c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
94c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
95c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
96c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
97c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            lag,                    /* I    Pitch lag                           */
98c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
99c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
100c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          LF_shp_Q14,             /* I                                        */
101c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          Gain_Q16,               /* I                                        */
102c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            Lambda_Q10,             /* I                                        */
103c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            offset_Q10,             /* I                                        */
104c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            length,                 /* I    Input length                        */
105c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            subfr,                  /* I    Subframe number                     */
106c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
107c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
108c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            warping_Q16,            /* I                                        */
109c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
110c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
111c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            decisionDelay           /* I                                        */
112c91ee5b5642fcc4969150f73d5f6848f88bf1638flim);
113c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
114c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid silk_NSQ_del_dec_sse4_1(
115c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
116c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
117c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
118c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
119c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
120c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
121c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
122c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
123c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
124c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
125c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
126c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
127c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
128c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
129c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
130c91ee5b5642fcc4969150f73d5f6848f88bf1638flim)
131c91ee5b5642fcc4969150f73d5f6848f88bf1638flim{
132c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
133c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            last_smple_idx, smpl_buf_idx, decisionDelay;
134c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
135c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int16          *pxq;
136c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    VARDECL( opus_int32, sLTP_Q15 );
137c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    VARDECL( opus_int16, sLTP );
138c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          HarmShapeFIRPacked_Q14;
139c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            offset_Q10;
140c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          RDmin_Q10, Gain_Q10;
141c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    VARDECL( opus_int32, x_sc_Q10 );
142c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    VARDECL( opus_int32, delayedGain_Q10 );
143c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    VARDECL( NSQ_del_dec_struct, psDelDec );
144c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ_del_dec_struct  *psDD;
145c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    SAVE_STACK;
146c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
147c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Set unvoiced lag to the previous one, overwrite later for voiced */
148c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    lag = NSQ->lagPrev;
149c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
150c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_assert( NSQ->prev_gain_Q16 != 0 );
151c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
152c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Initialize delayed decision states */
153c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct );
154c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
155c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
156c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        psDD                 = &psDelDec[ k ];
157c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        psDD->Seed           = ( k + psIndices->Seed ) & 3;
158c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        psDD->SeedInit       = psDD->Seed;
159c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        psDD->RD_Q10         = 0;
160c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        psDD->LF_AR_Q14      = NSQ->sLF_AR_shp_Q14;
161c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
162c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
163c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
164c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
165c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
166c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    offset_Q10   = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
167c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    smpl_buf_idx = 0; /* index of oldest samples */
168c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
169c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length );
170c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
171c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* For voiced frames limit the decision delay to lower than the pitch lag */
172c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    if( psIndices->signalType == TYPE_VOICED ) {
173c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        for( k = 0; k < psEncC->nb_subfr; k++ ) {
174c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 );
175c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
176c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    } else {
177c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        if( lag > 0 ) {
178c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 );
179c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
180c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
181c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
182c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
183c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        LSF_interpolation_flag = 0;
184c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    } else {
185c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        LSF_interpolation_flag = 1;
186c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
187c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
188c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    ALLOC( sLTP_Q15,
189c91ee5b5642fcc4969150f73d5f6848f88bf1638flim           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
190c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
191c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
192c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
193c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Set up pointers to start of sub frame */
194c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
195c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
196c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
197c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    subfr = 0;
198c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    for( k = 0; k < psEncC->nb_subfr; k++ ) {
199c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        A_Q12      = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
200c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER           ];
201c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
202c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
203c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Noise shape parameters */
204c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
205c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
206c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
207c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
208c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        NSQ->rewhite_flag = 0;
209c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        if( psIndices->signalType == TYPE_VOICED ) {
210c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* Voiced */
211c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            lag = pitchL[ k ];
212c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
213c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* Re-whitening */
214c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
215c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                if( k == 2 ) {
216c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* RESET DELAYED DECISIONS */
217c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* Find winner */
218c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
219c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    Winner_ind = 0;
220c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) {
221c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                        if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) {
222c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                            RDmin_Q10 = psDelDec[ i ].RD_Q10;
223c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                            Winner_ind = i;
224c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                        }
225c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    }
226c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) {
227c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                        if( i != Winner_ind ) {
228c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                            psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 );
229c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                            silk_assert( psDelDec[ i ].RD_Q10 >= 0 );
230c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                        }
231c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    }
232c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
233c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* Copy final part of signals from winner state to output and long-term filter states */
234c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psDD = &psDelDec[ Winner_ind ];
235c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    last_smple_idx = smpl_buf_idx + decisionDelay;
236c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    for( i = 0; i < decisionDelay; i++ ) {
237c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
238c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
239c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
240c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
241c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
242c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    }
243c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
244c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    subfr = 0;
245c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
246c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
247c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Rewhiten with new A coefs */
248c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
249c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                silk_assert( start_idx > 0 );
250c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
251c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
252c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder, psEncC->arch );
253c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
254c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
255c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                NSQ->rewhite_flag = 1;
256c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
257c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
258c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
259c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        silk_nsq_del_dec_scale_states_sse4_1( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
260c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
261c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
262c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        silk_noise_shape_quantizer_del_dec_sse4_1( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
263c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
264c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
265c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
266c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
267c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        x_Q3   += psEncC->subfr_length;
268c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        pulses += psEncC->subfr_length;
269c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        pxq    += psEncC->subfr_length;
270c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
271c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
272c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Find winner */
273c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
274c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    Winner_ind = 0;
275c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) {
276c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) {
277c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            RDmin_Q10 = psDelDec[ k ].RD_Q10;
278c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            Winner_ind = k;
279c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
280c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
281c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
282c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Copy final part of signals from winner state to output and long-term filter states */
283c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    psDD = &psDelDec[ Winner_ind ];
284c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    psIndices->Seed = psDD->SeedInit;
285c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    last_smple_idx = smpl_buf_idx + decisionDelay;
286c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
287c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    for( i = 0; i < decisionDelay; i++ ) {
288c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
289c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
290c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
291c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
292c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
293c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
294c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
295c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) );
296c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
297c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Update states */
298c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
299c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ->lagPrev        = pitchL[ psEncC->nb_subfr - 1 ];
300c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
301c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Save quantized speech signal */
302c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
303c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
304c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
305c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    RESTORE_STACK;
306c91ee5b5642fcc4969150f73d5f6848f88bf1638flim}
307c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
308c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/******************************************/
309c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/* Noise shape quantizer for one subframe */
310c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/******************************************/
311c91ee5b5642fcc4969150f73d5f6848f88bf1638flimstatic OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
312c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
313c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
314c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            signalType,             /* I    Signal type                         */
315c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int32    x_Q10[],                /* I                                        */
316c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int8           pulses[],               /* O                                        */
317c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int16          xq[],                   /* O                                        */
318c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
319c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
320c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
321c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
322c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
323c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            lag,                    /* I    Pitch lag                           */
324c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
325c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
326c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          LF_shp_Q14,             /* I                                        */
327c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          Gain_Q16,               /* I                                        */
328c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            Lambda_Q10,             /* I                                        */
329c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            offset_Q10,             /* I                                        */
330c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            length,                 /* I    Input length                        */
331c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            subfr,                  /* I    Subframe number                     */
332c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
333c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
334c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            warping_Q16,            /* I                                        */
335c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
336c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
337c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            decisionDelay           /* I                                        */
338c91ee5b5642fcc4969150f73d5f6848f88bf1638flim)
339c91ee5b5642fcc4969150f73d5f6848f88bf1638flim{
340c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
341c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32   Winner_rand_state;
342c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
343c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
344c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
345c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
346c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
347c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    VARDECL( NSQ_sample_pair, psSampleState );
348c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ_del_dec_struct *psDD;
349c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ_sample_struct  *psSS;
350c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
351c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    __m128i a_Q12_0123, a_Q12_4567, a_Q12_89AB, a_Q12_CDEF;
352c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    __m128i b_Q12_0123, b_sr_Q12_0123;
353c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    SAVE_STACK;
354c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
355c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_assert( nStatesDelayedDecision > 0 );
356c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
357c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
358c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
359c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
360c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
361c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
362c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    a_Q12_0123 = OP_CVTEPI16_EPI32_M64( a_Q12 );
363c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    a_Q12_4567 = OP_CVTEPI16_EPI32_M64( a_Q12 + 4 );
364c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
365c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    if( opus_likely( predictLPCOrder == 16 ) ) {
366c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        a_Q12_89AB = OP_CVTEPI16_EPI32_M64( a_Q12 + 8 );
367c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        a_Q12_CDEF = OP_CVTEPI16_EPI32_M64( a_Q12 + 12 );
368c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
369c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
370c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    if( signalType == TYPE_VOICED ){
371c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        b_Q12_0123 = OP_CVTEPI16_EPI32_M64( b_Q14 );
372c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        b_sr_Q12_0123 = _mm_shuffle_epi32( b_Q12_0123, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
373c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
374c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    for( i = 0; i < length; i++ ) {
375c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Perform common calculations used in all states */
376c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
377c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Long-term prediction */
378c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        if( signalType == TYPE_VOICED ) {
379c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* Unrolled loop */
380c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
381c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            LTP_pred_Q14 = 2;
382c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            {
383c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                __m128i tmpa, tmpb, pred_lag_ptr_tmp;
384c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                pred_lag_ptr_tmp    = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) );
385c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                pred_lag_ptr_tmp    = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B );
386c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpa                = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_0123 );
387c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpa                = _mm_srli_si128( tmpa, 2 );
388c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
389c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) );/* equal shift right 4 bytes */
390c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                pred_lag_ptr_tmp    = _mm_mul_epi32( pred_lag_ptr_tmp, b_sr_Q12_0123 );
391c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                pred_lag_ptr_tmp    = _mm_srli_si128( pred_lag_ptr_tmp, 2 );
392c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                pred_lag_ptr_tmp    = _mm_add_epi32( pred_lag_ptr_tmp, tmpa );
393c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
394c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpb = _mm_shuffle_epi32( pred_lag_ptr_tmp, _MM_SHUFFLE( 0, 0, 3, 2 ) );/* equal shift right 8 bytes */
395c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                pred_lag_ptr_tmp    = _mm_add_epi32( pred_lag_ptr_tmp, tmpb );
396c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                LTP_pred_Q14        += _mm_cvtsi128_si32( pred_lag_ptr_tmp );
397c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
398c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
399c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */
400c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                pred_lag_ptr++;
401c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
402c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        } else {
403c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            LTP_pred_Q14 = 0;
404c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
405c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
406c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Long-term shaping */
407c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        if( lag > 0 ) {
408c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* Symmetric, packed FIR coefficients */
409c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
410c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
411c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */
412c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            shp_lag_ptr++;
413c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        } else {
414c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            n_LTP_Q14 = 0;
415c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
416c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        {
417c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            __m128i tmpa, tmpb, psLPC_Q14_tmp, a_Q12_tmp;
418c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
419c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            for( k = 0; k < nStatesDelayedDecision; k++ ) {
420c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Delayed decision state */
421c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psDD = &psDelDec[ k ];
422c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
423c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Sample state */
424c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSS = psSampleState[ k ];
425c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
426c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Generate dither */
427c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psDD->Seed = silk_RAND( psDD->Seed );
428c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
429c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Pointer used in short term prediction and shaping */
430c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
431c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Short-term prediction */
432c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
433c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
434c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
435c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
436c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpb = _mm_setzero_si128();
437c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
438c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* step 1 */
439c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */
440c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );      /* 0, -1, -2, -3 */
441c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 );    /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */
442c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
443c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpa            = _mm_srli_epi64( tmpa, 16 );
444c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpb            = _mm_add_epi32( tmpb, tmpa );
445c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
446c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
447c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                a_Q12_tmp = _mm_shuffle_epi32( a_Q12_0123, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
448c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp ); /* 1*-1, 3*-3 */
449c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
450c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
451c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
452c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* step 2 */
453c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) );
454c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
455c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 );
456c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpa            = _mm_srli_epi64( tmpa, 16 );
457c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpb            = _mm_add_epi32( tmpb, tmpa );
458c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
459c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
460c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                a_Q12_tmp = _mm_shuffle_epi32( a_Q12_4567, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
461c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
462c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
463c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
464c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
465c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                if ( opus_likely( predictLPCOrder == 16 ) )
466c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                {
467c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* step 3 */
468c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -11 ] ) );
469c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
470c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB );
471c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpa            = _mm_srli_epi64( tmpa, 16 );
472c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpb            = _mm_add_epi32( tmpb, tmpa );
473c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
474c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
475c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    a_Q12_tmp = _mm_shuffle_epi32( a_Q12_89AB, _MM_SHUFFLE(0, 3, 2, 1 ) );/* equal shift right 4 bytes */
476c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
477c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
478c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
479c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
480c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* setp 4 */
481c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) );
482c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
483c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF );
484c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpa            = _mm_srli_epi64( tmpa, 16 );
485c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpb            = _mm_add_epi32( tmpb, tmpa );
486c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
487c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, _MM_SHUFFLE( 0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
488c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    a_Q12_tmp = _mm_shuffle_epi32( a_Q12_CDEF, _MM_SHUFFLE(0, 3, 2, 1 ) ); /* equal shift right 4 bytes */
489c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp   = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_tmp );
490c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psLPC_Q14_tmp   = _mm_srli_epi64( psLPC_Q14_tmp, 16 );
491c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
492c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
493c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* add at last */
494c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* equal shift right 8 bytes*/
495c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpa            = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) );
496c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpb            = _mm_add_epi32( tmpb, tmpa );
497c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    LPC_pred_Q14    += _mm_cvtsi128_si32( tmpb );
498c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
499c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                else
500c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                {
501c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* add at last */
502c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpa            = _mm_shuffle_epi32( tmpb, _MM_SHUFFLE( 0, 0, 3, 2 ) ); /* equal shift right 8 bytes*/
503c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmpb            = _mm_add_epi32( tmpb, tmpa );
504c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    LPC_pred_Q14    += _mm_cvtsi128_si32( tmpb );
505c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
506c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
507c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
508c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
509c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
510c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
511c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
512c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Noise shape feedback */
513c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
514c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Output of lowpass section */
515c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
516c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Output of allpass section */
517c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
518c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psDD->sAR2_Q14[ 0 ] = tmp2;
519c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
520c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
521c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Loop over allpass sections */
522c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                for( j = 2; j < shapingLPCOrder; j += 2 ) {
523c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* Output of allpass section */
524c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
525c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psDD->sAR2_Q14[ j - 1 ] = tmp1;
526c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] );
527c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    /* Output of allpass section */
528c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
529c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psDD->sAR2_Q14[ j + 0 ] = tmp2;
530c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] );
531c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
532c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
533c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
534c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
535c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */
536c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */
537c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */
538c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
539c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */
540c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */
541c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */
542c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
543c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Input minus prediction plus noise feedback                       */
544c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
545c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */
546c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */
547c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */
548c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */
549c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
550c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */
551c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
552c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Flip sign depending on dither */
553c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                if ( psDD->Seed < 0 ) {
554c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    r_Q10 = -r_Q10;
555c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
556c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
557c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
558c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Find two quantization level candidates and measure their rate-distortion */
559c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
560c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
561c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                if( q1_Q0 > 0 ) {
562c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
563c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
564c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q2_Q10  = silk_ADD32( q1_Q10, 1024 );
565c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
566c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
567c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                } else if( q1_Q0 == 0 ) {
568c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q1_Q10  = offset_Q10;
569c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
570c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
571c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
572c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                } else if( q1_Q0 == -1 ) {
573c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q2_Q10  = offset_Q10;
574c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
575c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
576c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
577c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                } else {            /* q1_Q0 < -1 */
578c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
579c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
580c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    q2_Q10  = silk_ADD32( q1_Q10, 1024 );
581c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
582c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
583c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
584c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
585c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
586c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
587c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
588c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
589c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                if( rd1_Q10 < rd2_Q10 ) {
590c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
591c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
592c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psSS[ 0 ].Q_Q10  = q1_Q10;
593c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psSS[ 1 ].Q_Q10  = q2_Q10;
594c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                } else {
595c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
596c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
597c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psSS[ 0 ].Q_Q10  = q2_Q10;
598c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psSS[ 1 ].Q_Q10  = q1_Q10;
599c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
600c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
601c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Update states for best quantization */
602c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
603c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Quantized excitation */
604c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
605c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                if ( psDD->Seed < 0 ) {
606c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    exc_Q14 = -exc_Q14;
607c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
608c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
609c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Add predictions */
610c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
611c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
612c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
613c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Update states */
614c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
615c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
616c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;
617c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;
618c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSS[ 0 ].xq_Q14       = xq_Q14;
619c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
620c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Update states for second best quantization */
621c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
622c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Quantized excitation */
623c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
624c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                if ( psDD->Seed < 0 ) {
625c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    exc_Q14 = -exc_Q14;
626c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
627c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
628c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
629c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Add predictions */
630c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
631c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
632c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
633c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Update states */
634c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
635c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
636c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;
637c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;
638c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSS[ 1 ].xq_Q14       = xq_Q14;
639c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
640c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
641c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */
642c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */
643c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
644c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Find winner */
645c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
646c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        Winner_ind = 0;
647c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        for( k = 1; k < nStatesDelayedDecision; k++ ) {
648c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
649c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
650c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                Winner_ind = k;
651c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
652c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
653c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
654c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Increase RD values of expired states */
655c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
656c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        for( k = 0; k < nStatesDelayedDecision; k++ ) {
657c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
658c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
659c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
660c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
661c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
662c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
663c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
664c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Find worst in first set and best in second set */
665c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
666c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
667c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        RDmax_ind = 0;
668c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        RDmin_ind = 0;
669c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        for( k = 1; k < nStatesDelayedDecision; k++ ) {
670c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* find worst in first set */
671c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
672c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
673c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                RDmax_ind = k;
674c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
675c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* find best in second set */
676c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
677c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
678c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                RDmin_ind = k;
679c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
680c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
681c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
682c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Replace a state if best from second set outperforms worst in first set */
683c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        if( RDmin_Q10 < RDmax_Q10 ) {
684c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
685c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
686c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
687c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
688c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
689c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Write samples from winner to output and long-term filter states */
690c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        psDD = &psDelDec[ Winner_ind ];
691c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        if( subfr > 0 || i >= decisionDelay ) {
692c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
693c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
694c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
695c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
696c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];
697c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
698c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        NSQ->sLTP_shp_buf_idx++;
699c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        NSQ->sLTP_buf_idx++;
700c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
701c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Update states */
702c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        for( k = 0; k < nStatesDelayedDecision; k++ ) {
703c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD                                     = &psDelDec[ k ];
704c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psSS                                     = &psSampleState[ k ][ 0 ];
705c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;
706c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
707c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;
708c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
709c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
710c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;
711c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
712c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
713c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            psDD->RD_Q10                             = psSS->RD_Q10;
714c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
715c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;
716c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
717c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Update LPC states */
718c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    for( k = 0; k < nStatesDelayedDecision; k++ ) {
719c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        psDD = &psDelDec[ k ];
720c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
721c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
722c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    RESTORE_STACK;
723c91ee5b5642fcc4969150f73d5f6848f88bf1638flim}
724c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
725c91ee5b5642fcc4969150f73d5f6848f88bf1638flimstatic OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
726c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
727c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
728c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
729c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
730c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
731c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
732c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
733c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            subfr,                      /* I    Subframe number                     */
734c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
735c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
736c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
737c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
738c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int      signal_type,                /* I    Signal type                         */
739c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    const opus_int      decisionDelay               /* I    Decision delay                      */
740c91ee5b5642fcc4969150f73d5f6848f88bf1638flim)
741c91ee5b5642fcc4969150f73d5f6848f88bf1638flim{
742c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int            i, k, lag;
743c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    opus_int32          gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
744c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ_del_dec_struct  *psDD;
745c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    __m128i xmm_inv_gain_Q23, xmm_x_Q3_x2x0, xmm_x_Q3_x3x1;
746c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
747c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    lag          = pitchL[ subfr ];
748c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
749c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
750c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    silk_assert( inv_gain_Q31 != 0 );
751c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
752c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Calculate gain adjustment factor */
753c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
754c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
755c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    } else {
756c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        gain_adj_Q16 = (opus_int32)1 << 16;
757c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
758c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
759c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Scale input */
760c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
761c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
762c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* prepare inv_gain_Q23 in packed 4 32-bits */
763c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    xmm_inv_gain_Q23 = _mm_set1_epi32(inv_gain_Q23);
764c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
765c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    for( i = 0; i < psEncC->subfr_length - 3; i += 4 ) {
766c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        xmm_x_Q3_x2x0 = _mm_loadu_si128( (__m128i *)(&(x_Q3[ i ] ) ) );
767c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* equal shift right 4 bytes*/
768c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        xmm_x_Q3_x3x1 = _mm_shuffle_epi32( xmm_x_Q3_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
769c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
770c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        xmm_x_Q3_x2x0 = _mm_mul_epi32( xmm_x_Q3_x2x0, xmm_inv_gain_Q23 );
771c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        xmm_x_Q3_x3x1 = _mm_mul_epi32( xmm_x_Q3_x3x1, xmm_inv_gain_Q23 );
772c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
773c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        xmm_x_Q3_x2x0 = _mm_srli_epi64( xmm_x_Q3_x2x0, 16 );
774c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        xmm_x_Q3_x3x1 = _mm_slli_epi64( xmm_x_Q3_x3x1, 16 );
775c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
776c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        xmm_x_Q3_x2x0 = _mm_blend_epi16( xmm_x_Q3_x2x0, xmm_x_Q3_x3x1, 0xCC );
777c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
778c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ])), xmm_x_Q3_x2x0 );
779c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
780c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
781c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    for( ; i < psEncC->subfr_length; i++ ) {
782c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
783c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
784c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
785c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Save inverse gain */
786c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
787c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
788c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
789c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    if( NSQ->rewhite_flag ) {
790c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        if( subfr == 0 ) {
791c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* Do LTP downscaling */
792c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
793c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
794c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
795c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            silk_assert( i < MAX_FRAME_LENGTH );
796c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
797c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
798c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
799c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
800c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    /* Adjust for changing gain */
801c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
802c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        /* Scale long-term shaping state */
803c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        {
804c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            __m128i xmm_gain_adj_Q16, xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1;
805c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
806c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* prepare gain_adj_Q16 in packed 4 32-bits */
807c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            xmm_gain_adj_Q16 = _mm_set1_epi32( gain_adj_Q16 );
808c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
809c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 )
810c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            {
811c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
812c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* equal shift right 4 bytes*/
813c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
814c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
815c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                xmm_sLTP_shp_Q14_x2x0 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x2x0, xmm_gain_adj_Q16 );
816c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                xmm_sLTP_shp_Q14_x3x1 = _mm_mul_epi32( xmm_sLTP_shp_Q14_x3x1, xmm_gain_adj_Q16 );
817c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
818c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                xmm_sLTP_shp_Q14_x2x0 = _mm_srli_epi64( xmm_sLTP_shp_Q14_x2x0, 16 );
819c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                xmm_sLTP_shp_Q14_x3x1 = _mm_slli_epi64( xmm_sLTP_shp_Q14_x3x1, 16 );
820c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
821c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
822c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
823c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
824c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
825c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
826c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
827c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
828c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
829c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
830c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            /* Scale long-term prediction state */
831c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
832c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) {
833c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
834c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
835c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
836c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
837c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            for( k = 0; k < nStatesDelayedDecision; k++ ) {
838c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psDD = &psDelDec[ k ];
839c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
840c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Scale scalar states */
841c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
842c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
843c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                /* Scale short-term prediction and shaping states */
844c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
845c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] );
846c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
847c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
848c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] );
849c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
850c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                for( i = 0; i < DECISION_DELAY; i++ ) {
851c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psDD->Pred_Q15[  i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[  i ] );
852c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                    psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
853c91ee5b5642fcc4969150f73d5f6848f88bf1638flim                }
854c91ee5b5642fcc4969150f73d5f6848f88bf1638flim            }
855c91ee5b5642fcc4969150f73d5f6848f88bf1638flim        }
856c91ee5b5642fcc4969150f73d5f6848f88bf1638flim    }
857c91ee5b5642fcc4969150f73d5f6848f88bf1638flim}
858