1/***********************************************************************
2Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3Redistribution and use in source and binary forms, with or without
4modification, are permitted provided that the following conditions
5are met:
6- Redistributions of source code must retain the above copyright notice,
7this list of conditions and the following disclaimer.
8- Redistributions in binary form must reproduce the above copyright
9notice, this list of conditions and the following disclaimer in the
10documentation and/or other materials provided with the distribution.
11- Neither the name of Internet Society, IETF or IETF Trust, nor the
12names of specific contributors, may be used to endorse or promote
13products derived from this software without specific prior written
14permission.
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25POSSIBILITY OF SUCH DAMAGE.
26***********************************************************************/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "main.h"
33#include "stack_alloc.h"
34
35typedef struct {
36    opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
37    opus_int32 RandState[ DECISION_DELAY ];
38    opus_int32 Q_Q10[     DECISION_DELAY ];
39    opus_int32 Xq_Q14[    DECISION_DELAY ];
40    opus_int32 Pred_Q15[  DECISION_DELAY ];
41    opus_int32 Shape_Q14[ DECISION_DELAY ];
42    opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
43    opus_int32 LF_AR_Q14;
44    opus_int32 Seed;
45    opus_int32 SeedInit;
46    opus_int32 RD_Q10;
47} NSQ_del_dec_struct;
48
49typedef struct {
50    opus_int32 Q_Q10;
51    opus_int32 RD_Q10;
52    opus_int32 xq_Q14;
53    opus_int32 LF_AR_Q14;
54    opus_int32 sLTP_shp_Q14;
55    opus_int32 LPC_exc_Q14;
56} NSQ_sample_struct;
57
58typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];
59
60static OPUS_INLINE void silk_nsq_del_dec_scale_states(
61    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
62    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
63    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
64    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
65    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
66    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
67    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
68    opus_int            subfr,                      /* I    Subframe number                     */
69    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
70    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
71    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
72    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
73    const opus_int      signal_type,                /* I    Signal type                         */
74    const opus_int      decisionDelay               /* I    Decision delay                      */
75);
76
77/******************************************/
78/* Noise shape quantizer for one subframe */
79/******************************************/
80static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
81    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
82    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
83    opus_int            signalType,             /* I    Signal type                         */
84    const opus_int32    x_Q10[],                /* I                                        */
85    opus_int8           pulses[],               /* O                                        */
86    opus_int16          xq[],                   /* O                                        */
87    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
88    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
89    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
90    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
91    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
92    opus_int            lag,                    /* I    Pitch lag                           */
93    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
94    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
95    opus_int32          LF_shp_Q14,             /* I                                        */
96    opus_int32          Gain_Q16,               /* I                                        */
97    opus_int            Lambda_Q10,             /* I                                        */
98    opus_int            offset_Q10,             /* I                                        */
99    opus_int            length,                 /* I    Input length                        */
100    opus_int            subfr,                  /* I    Subframe number                     */
101    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
102    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
103    opus_int            warping_Q16,            /* I                                        */
104    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
105    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
106    opus_int            decisionDelay           /* I                                        */
107);
108
109void silk_NSQ_del_dec(
110    const silk_encoder_state    *psEncC,                                    /* I/O  Encoder State                   */
111    silk_nsq_state              *NSQ,                                       /* I/O  NSQ state                       */
112    SideInfoIndices             *psIndices,                                 /* I/O  Quantization Indices            */
113    const opus_int32            x_Q3[],                                     /* I    Prefiltered input signal        */
114    opus_int8                   pulses[],                                   /* O    Quantized pulse signal          */
115    const opus_int16            PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Short term prediction coefs     */
116    const opus_int16            LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    Long term prediction coefs      */
117    const opus_int16            AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs             */
118    const opus_int              HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I    Long term shaping coefs         */
119    const opus_int              Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                   */
120    const opus_int32            LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I    Low frequency shaping coefs     */
121    const opus_int32            Gains_Q16[ MAX_NB_SUBFR ],                  /* I    Quantization step sizes         */
122    const opus_int              pitchL[ MAX_NB_SUBFR ],                     /* I    Pitch lags                      */
123    const opus_int              Lambda_Q10,                                 /* I    Rate/distortion tradeoff        */
124    const opus_int              LTP_scale_Q14                               /* I    LTP state scaling               */
125)
126{
127    opus_int            i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
128    opus_int            last_smple_idx, smpl_buf_idx, decisionDelay;
129    const opus_int16    *A_Q12, *B_Q14, *AR_shp_Q13;
130    opus_int16          *pxq;
131    VARDECL( opus_int32, sLTP_Q15 );
132    VARDECL( opus_int16, sLTP );
133    opus_int32          HarmShapeFIRPacked_Q14;
134    opus_int            offset_Q10;
135    opus_int32          RDmin_Q10, Gain_Q10;
136    VARDECL( opus_int32, x_sc_Q10 );
137    VARDECL( opus_int32, delayedGain_Q10 );
138    VARDECL( NSQ_del_dec_struct, psDelDec );
139    NSQ_del_dec_struct  *psDD;
140    SAVE_STACK;
141
142    /* Set unvoiced lag to the previous one, overwrite later for voiced */
143    lag = NSQ->lagPrev;
144
145    silk_assert( NSQ->prev_gain_Q16 != 0 );
146
147    /* Initialize delayed decision states */
148    ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct );
149    silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
150    for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
151        psDD                 = &psDelDec[ k ];
152        psDD->Seed           = ( k + psIndices->Seed ) & 3;
153        psDD->SeedInit       = psDD->Seed;
154        psDD->RD_Q10         = 0;
155        psDD->LF_AR_Q14      = NSQ->sLF_AR_shp_Q14;
156        psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ];
157        silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
158        silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
159    }
160
161    offset_Q10   = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
162    smpl_buf_idx = 0; /* index of oldest samples */
163
164    decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length );
165
166    /* For voiced frames limit the decision delay to lower than the pitch lag */
167    if( psIndices->signalType == TYPE_VOICED ) {
168        for( k = 0; k < psEncC->nb_subfr; k++ ) {
169            decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 );
170        }
171    } else {
172        if( lag > 0 ) {
173            decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 );
174        }
175    }
176
177    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
178        LSF_interpolation_flag = 0;
179    } else {
180        LSF_interpolation_flag = 1;
181    }
182
183    ALLOC( sLTP_Q15,
184           psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 );
185    ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 );
186    ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 );
187    ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 );
188    /* Set up pointers to start of sub frame */
189    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
190    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
191    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
192    subfr = 0;
193    for( k = 0; k < psEncC->nb_subfr; k++ ) {
194        A_Q12      = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
195        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER           ];
196        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];
197
198        /* Noise shape parameters */
199        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
200        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
201        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
202
203        NSQ->rewhite_flag = 0;
204        if( psIndices->signalType == TYPE_VOICED ) {
205            /* Voiced */
206            lag = pitchL[ k ];
207
208            /* Re-whitening */
209            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
210                if( k == 2 ) {
211                    /* RESET DELAYED DECISIONS */
212                    /* Find winner */
213                    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
214                    Winner_ind = 0;
215                    for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) {
216                        if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) {
217                            RDmin_Q10 = psDelDec[ i ].RD_Q10;
218                            Winner_ind = i;
219                        }
220                    }
221                    for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) {
222                        if( i != Winner_ind ) {
223                            psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 );
224                            silk_assert( psDelDec[ i ].RD_Q10 >= 0 );
225                        }
226                    }
227
228                    /* Copy final part of signals from winner state to output and long-term filter states */
229                    psDD = &psDelDec[ Winner_ind ];
230                    last_smple_idx = smpl_buf_idx + decisionDelay;
231                    for( i = 0; i < decisionDelay; i++ ) {
232                        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
233                        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
234                        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
235                            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) );
236                        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
237                    }
238
239                    subfr = 0;
240                }
241
242                /* Rewhiten with new A coefs */
243                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
244                silk_assert( start_idx > 0 );
245
246                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
247                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );
248
249                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
250                NSQ->rewhite_flag = 1;
251            }
252        }
253
254        silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k,
255            psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
256
257        silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
258            delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
259            Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
260            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
261
262        x_Q3   += psEncC->subfr_length;
263        pulses += psEncC->subfr_length;
264        pxq    += psEncC->subfr_length;
265    }
266
267    /* Find winner */
268    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
269    Winner_ind = 0;
270    for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) {
271        if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) {
272            RDmin_Q10 = psDelDec[ k ].RD_Q10;
273            Winner_ind = k;
274        }
275    }
276
277    /* Copy final part of signals from winner state to output and long-term filter states */
278    psDD = &psDelDec[ Winner_ind ];
279    psIndices->Seed = psDD->SeedInit;
280    last_smple_idx = smpl_buf_idx + decisionDelay;
281    Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 );
282    for( i = 0; i < decisionDelay; i++ ) {
283        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
284        pulses[   i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
285        pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
286            silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) );
287        NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ];
288    }
289    silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
290    silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) );
291
292    /* Update states */
293    NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14;
294    NSQ->lagPrev        = pitchL[ psEncC->nb_subfr - 1 ];
295
296    /* Save quantized speech signal */
297    /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */
298    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
299    silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
300    RESTORE_STACK;
301}
302
303/******************************************/
304/* Noise shape quantizer for one subframe */
305/******************************************/
306static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
307    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
308    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
309    opus_int            signalType,             /* I    Signal type                         */
310    const opus_int32    x_Q10[],                /* I                                        */
311    opus_int8           pulses[],               /* O                                        */
312    opus_int16          xq[],                   /* O                                        */
313    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */
314    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */
315    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */
316    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */
317    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */
318    opus_int            lag,                    /* I    Pitch lag                           */
319    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */
320    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */
321    opus_int32          LF_shp_Q14,             /* I                                        */
322    opus_int32          Gain_Q16,               /* I                                        */
323    opus_int            Lambda_Q10,             /* I                                        */
324    opus_int            offset_Q10,             /* I                                        */
325    opus_int            length,                 /* I    Input length                        */
326    opus_int            subfr,                  /* I    Subframe number                     */
327    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */
328    opus_int            predictLPCOrder,        /* I    Prediction filter order             */
329    opus_int            warping_Q16,            /* I                                        */
330    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
331    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
332    opus_int            decisionDelay           /* I                                        */
333)
334{
335    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
336    opus_int32   Winner_rand_state;
337    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;
338    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
339    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
340    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
341    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
342    VARDECL( NSQ_sample_pair, psSampleState );
343    NSQ_del_dec_struct *psDD;
344    NSQ_sample_struct  *psSS;
345    SAVE_STACK;
346
347    silk_assert( nStatesDelayedDecision > 0 );
348    ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair );
349
350    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
351    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
352    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
353
354    for( i = 0; i < length; i++ ) {
355        /* Perform common calculations used in all states */
356
357        /* Long-term prediction */
358        if( signalType == TYPE_VOICED ) {
359            /* Unrolled loop */
360            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
361            LTP_pred_Q14 = 2;
362            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[  0 ], b_Q14[ 0 ] );
363            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
364            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] );
365            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
366            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
367            LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */
368            pred_lag_ptr++;
369        } else {
370            LTP_pred_Q14 = 0;
371        }
372
373        /* Long-term shaping */
374        if( lag > 0 ) {
375            /* Symmetric, packed FIR coefficients */
376            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
377            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );
378            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */
379            shp_lag_ptr++;
380        } else {
381            n_LTP_Q14 = 0;
382        }
383
384        for( k = 0; k < nStatesDelayedDecision; k++ ) {
385            /* Delayed decision state */
386            psDD = &psDelDec[ k ];
387
388            /* Sample state */
389            psSS = psSampleState[ k ];
390
391            /* Generate dither */
392            psDD->Seed = silk_RAND( psDD->Seed );
393
394            /* Pointer used in short term prediction and shaping */
395            psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
396            /* Short-term prediction */
397            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
398            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
399            LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
400            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[  0 ], a_Q12[ 0 ] );
401            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
402            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
403            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
404            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
405            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
406            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
407            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
408            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
409            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
410            if( predictLPCOrder == 16 ) {
411                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
412                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
413                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
414                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
415                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
416                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
417            }
418            LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
419
420            /* Noise shape feedback */
421            silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
422            /* Output of lowpass section */
423            tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
424            /* Output of allpass section */
425            tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
426            psDD->sAR2_Q14[ 0 ] = tmp2;
427            n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 );
428            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] );
429            /* Loop over allpass sections */
430            for( j = 2; j < shapingLPCOrder; j += 2 ) {
431                /* Output of allpass section */
432                tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
433                psDD->sAR2_Q14[ j - 1 ] = tmp1;
434                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] );
435                /* Output of allpass section */
436                tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
437                psDD->sAR2_Q14[ j + 0 ] = tmp2;
438                n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] );
439            }
440            psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
441            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
442
443            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */
444            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */
445            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */
446
447            n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */
448            n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */
449            n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */
450
451            /* Input minus prediction plus noise feedback                       */
452            /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
453            tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */
454            tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */
455            tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */
456            tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */
457
458            r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */
459
460            /* Flip sign depending on dither */
461            if ( psDD->Seed < 0 ) {
462                r_Q10 = -r_Q10;
463            }
464            r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
465
466            /* Find two quantization level candidates and measure their rate-distortion */
467            q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
468            q1_Q0 = silk_RSHIFT( q1_Q10, 10 );
469            if( q1_Q0 > 0 ) {
470                q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
471                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
472                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
473                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
474                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
475            } else if( q1_Q0 == 0 ) {
476                q1_Q10  = offset_Q10;
477                q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
478                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
479                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
480            } else if( q1_Q0 == -1 ) {
481                q2_Q10  = offset_Q10;
482                q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
483                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
484                rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
485            } else {            /* q1_Q0 < -1 */
486                q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );
487                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
488                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
489                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
490                rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
491            }
492            rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
493            rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
494            rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
495            rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
496
497            if( rd1_Q10 < rd2_Q10 ) {
498                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
499                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
500                psSS[ 0 ].Q_Q10  = q1_Q10;
501                psSS[ 1 ].Q_Q10  = q2_Q10;
502            } else {
503                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
504                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
505                psSS[ 0 ].Q_Q10  = q2_Q10;
506                psSS[ 1 ].Q_Q10  = q1_Q10;
507            }
508
509            /* Update states for best quantization */
510
511            /* Quantized excitation */
512            exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );
513            if ( psDD->Seed < 0 ) {
514                exc_Q14 = -exc_Q14;
515            }
516
517            /* Add predictions */
518            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
519            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
520
521            /* Update states */
522            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
523            psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
524            psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;
525            psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;
526            psSS[ 0 ].xq_Q14       = xq_Q14;
527
528            /* Update states for second best quantization */
529
530            /* Quantized excitation */
531            exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );
532            if ( psDD->Seed < 0 ) {
533                exc_Q14 = -exc_Q14;
534            }
535
536
537            /* Add predictions */
538            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );
539            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );
540
541            /* Update states */
542            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );
543            psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );
544            psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;
545            psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;
546            psSS[ 1 ].xq_Q14       = xq_Q14;
547        }
548
549        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */
550        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */
551
552        /* Find winner */
553        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
554        Winner_ind = 0;
555        for( k = 1; k < nStatesDelayedDecision; k++ ) {
556            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
557                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
558                Winner_ind = k;
559            }
560        }
561
562        /* Increase RD values of expired states */
563        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
564        for( k = 0; k < nStatesDelayedDecision; k++ ) {
565            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
566                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );
567                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );
568                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
569            }
570        }
571
572        /* Find worst in first set and best in second set */
573        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
574        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
575        RDmax_ind = 0;
576        RDmin_ind = 0;
577        for( k = 1; k < nStatesDelayedDecision; k++ ) {
578            /* find worst in first set */
579            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
580                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
581                RDmax_ind = k;
582            }
583            /* find best in second set */
584            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
585                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
586                RDmin_ind = k;
587            }
588        }
589
590        /* Replace a state if best from second set outperforms worst in first set */
591        if( RDmin_Q10 < RDmax_Q10 ) {
592            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,
593                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
594            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
595        }
596
597        /* Write samples from winner to output and long-term filter states */
598        psDD = &psDelDec[ Winner_ind ];
599        if( subfr > 0 || i >= decisionDelay ) {
600            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
601            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(
602                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );
603            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];
604            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];
605        }
606        NSQ->sLTP_shp_buf_idx++;
607        NSQ->sLTP_buf_idx++;
608
609        /* Update states */
610        for( k = 0; k < nStatesDelayedDecision; k++ ) {
611            psDD                                     = &psDelDec[ k ];
612            psSS                                     = &psSampleState[ k ][ 0 ];
613            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;
614            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
615            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;
616            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
617            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );
618            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;
619            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
620            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
621            psDD->RD_Q10                             = psSS->RD_Q10;
622        }
623        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;
624    }
625    /* Update LPC states */
626    for( k = 0; k < nStatesDelayedDecision; k++ ) {
627        psDD = &psDelDec[ k ];
628        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
629    }
630    RESTORE_STACK;
631}
632
633static OPUS_INLINE void silk_nsq_del_dec_scale_states(
634    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
635    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
636    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
637    const opus_int32    x_Q3[],                     /* I    Input in Q3                         */
638    opus_int32          x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
639    const opus_int16    sLTP[],                     /* I    Re-whitened LTP state in Q0         */
640    opus_int32          sLTP_Q15[],                 /* O    LTP state matching scaled input     */
641    opus_int            subfr,                      /* I    Subframe number                     */
642    opus_int            nStatesDelayedDecision,     /* I    Number of del dec states            */
643    const opus_int      LTP_scale_Q14,              /* I    LTP state scaling                   */
644    const opus_int32    Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
645    const opus_int      pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
646    const opus_int      signal_type,                /* I    Signal type                         */
647    const opus_int      decisionDelay               /* I    Decision delay                      */
648)
649{
650    opus_int            i, k, lag;
651    opus_int32          gain_adj_Q16, inv_gain_Q31, inv_gain_Q23;
652    NSQ_del_dec_struct  *psDD;
653
654    lag          = pitchL[ subfr ];
655    inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 );
656    silk_assert( inv_gain_Q31 != 0 );
657
658    /* Calculate gain adjustment factor */
659    if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) {
660        gain_adj_Q16 =  silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 );
661    } else {
662        gain_adj_Q16 = (opus_int32)1 << 16;
663    }
664
665    /* Scale input */
666    inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 );
667    for( i = 0; i < psEncC->subfr_length; i++ ) {
668        x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 );
669    }
670
671    /* Save inverse gain */
672    NSQ->prev_gain_Q16 = Gains_Q16[ subfr ];
673
674    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
675    if( NSQ->rewhite_flag ) {
676        if( subfr == 0 ) {
677            /* Do LTP downscaling */
678            inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 );
679        }
680        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
681            silk_assert( i < MAX_FRAME_LENGTH );
682            sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] );
683        }
684    }
685
686    /* Adjust for changing gain */
687    if( gain_adj_Q16 != (opus_int32)1 << 16 ) {
688        /* Scale long-term shaping state */
689        for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
690            NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] );
691        }
692
693        /* Scale long-term prediction state */
694        if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
695            for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) {
696                sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] );
697            }
698        }
699
700        for( k = 0; k < nStatesDelayedDecision; k++ ) {
701            psDD = &psDelDec[ k ];
702
703            /* Scale scalar states */
704            psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 );
705
706            /* Scale short-term prediction and shaping states */
707            for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
708                psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] );
709            }
710            for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
711                psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] );
712            }
713            for( i = 0; i < DECISION_DELAY; i++ ) {
714                psDD->Pred_Q15[  i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[  i ] );
715                psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] );
716            }
717        }
718    }
719}
720