1/***********************************************************************
2Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3Redistribution and use in source and binary forms, with or without
4modification, are permitted provided that the following conditions
5are met:
6- Redistributions of source code must retain the above copyright notice,
7this list of conditions and the following disclaimer.
8- Redistributions in binary form must reproduce the above copyright
9notice, this list of conditions and the following disclaimer in the
10documentation and/or other materials provided with the distribution.
11- Neither the name of Internet Society, IETF or IETF Trust, nor the
12names of specific contributors, may be used to endorse or promote
13products derived from this software without specific prior written
14permission.
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25POSSIBILITY OF SUCH DAMAGE.
26***********************************************************************/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32/***********************************************************
33* Pitch analyser function
34********************************************************** */
35#include "SigProc_FIX.h"
36#include "pitch_est_defines.h"
37#include "stack_alloc.h"
38#include "debug.h"
39#include "pitch.h"
40
41#define SCRATCH_SIZE    22
42#define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )
43#define SF_LENGTH_8KHZ  ( PE_SUBFR_LENGTH_MS * 8 )
44#define MIN_LAG_4KHZ    ( PE_MIN_LAG_MS * 4 )
45#define MIN_LAG_8KHZ    ( PE_MIN_LAG_MS * 8 )
46#define MAX_LAG_4KHZ    ( PE_MAX_LAG_MS * 4 )
47#define MAX_LAG_8KHZ    ( PE_MAX_LAG_MS * 8 - 1 )
48#define CSTRIDE_4KHZ    ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
49#define CSTRIDE_8KHZ    ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
50#define D_COMP_MIN      ( MIN_LAG_8KHZ - 3 )
51#define D_COMP_MAX      ( MAX_LAG_8KHZ + 4 )
52#define D_COMP_STRIDE   ( D_COMP_MAX - D_COMP_MIN )
53
54typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
55
56/************************************************************/
57/* Internally used functions                                */
58/************************************************************/
59static void silk_P_Ana_calc_corr_st3(
60    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
61    const opus_int16  frame[],                         /* I vector to correlate         */
62    opus_int          start_lag,                       /* I lag offset to search around */
63    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
64    opus_int          nb_subfr,                        /* I number of subframes         */
65    opus_int          complexity,                      /* I Complexity setting          */
66    int               arch                             /* I Run-time architecture       */
67);
68
69static void silk_P_Ana_calc_energy_st3(
70    silk_pe_stage3_vals energies_st3[],                /* O 3 DIM energy array */
71    const opus_int16  frame[],                         /* I vector to calc energy in    */
72    opus_int          start_lag,                       /* I lag offset to search around */
73    opus_int          sf_length,                       /* I length of one 5 ms subframe */
74    opus_int          nb_subfr,                        /* I number of subframes         */
75    opus_int          complexity                       /* I Complexity setting          */
76);
77
78/*************************************************************/
79/*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */
80/*************************************************************/
81opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
82    const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
83    opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
84    opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
85    opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
86    opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */
87    opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
88    const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */
89    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */
90    const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */
91    const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
92    const opus_int              nb_subfr,           /* I    number of 5 ms subframes                                    */
93    int                         arch                /* I    Run-time architecture                                       */
94)
95{
96    VARDECL( opus_int16, frame_8kHz );
97    VARDECL( opus_int16, frame_4kHz );
98    opus_int32 filt_state[ 6 ];
99    const opus_int16 *input_frame_ptr;
100    opus_int   i, k, d, j;
101    VARDECL( opus_int16, C );
102    VARDECL( opus_int32, xcorr32 );
103    const opus_int16 *target_ptr, *basis_ptr;
104    opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
105    opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
106    VARDECL( opus_int16, d_comp );
107    opus_int32 sum, threshold, lag_counter;
108    opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
109    opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
110    VARDECL( silk_pe_stage3_vals, energies_st3 );
111    VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
112    opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;
113    opus_int   sf_length;
114    opus_int   min_lag;
115    opus_int   max_lag;
116    opus_int32 contour_bias_Q15, diff;
117    opus_int   nb_cbk_search, cbk_size;
118    opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
119    const opus_int8 *Lag_CB_ptr;
120    SAVE_STACK;
121    /* Check for valid sampling frequency */
122    silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
123
124    /* Check for valid complexity setting */
125    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
126    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
127
128    silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
129    silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
130
131    /* Set up frame lengths max / min lag for the sampling frequency */
132    frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
133    frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
134    frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
135    sf_length         = PE_SUBFR_LENGTH_MS * Fs_kHz;
136    min_lag           = PE_MIN_LAG_MS * Fs_kHz;
137    max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
138
139    /* Resample from input sampled at Fs_kHz to 8 kHz */
140    ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
141    if( Fs_kHz == 16 ) {
142        silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
143        silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
144    } else if( Fs_kHz == 12 ) {
145        silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
146        silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
147    } else {
148        silk_assert( Fs_kHz == 8 );
149        silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
150    }
151
152    /* Decimate again to 4 kHz */
153    silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
154    ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
155    silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
156
157    /* Low-pass filter */
158    for( i = frame_length_4kHz - 1; i > 0; i-- ) {
159        frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
160    }
161
162    /*******************************************************************************
163    ** Scale 4 kHz signal down to prevent correlations measures from overflowing
164    ** find scaling as max scaling for each 8kHz(?) subframe
165    *******************************************************************************/
166
167    /* Inner product is calculated with different lengths, so scale for the worst case */
168    silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
169    if( shift > 0 ) {
170        shift = silk_RSHIFT( shift, 1 );
171        for( i = 0; i < frame_length_4kHz; i++ ) {
172            frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
173        }
174    }
175
176    /******************************************************************************
177    * FIRST STAGE, operating in 4 khz
178    ******************************************************************************/
179    ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
180    ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
181    silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
182    target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
183    for( k = 0; k < nb_subfr >> 1; k++ ) {
184        /* Check that we are within range of the array */
185        silk_assert( target_ptr >= frame_4kHz );
186        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
187
188        basis_ptr = target_ptr - MIN_LAG_4KHZ;
189
190        /* Check that we are within range of the array */
191        silk_assert( basis_ptr >= frame_4kHz );
192        silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
193
194        celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
195
196        /* Calculate first vector products before loop */
197        cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
198        normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
199        normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ ) );
200        normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
201
202        matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
203            (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                      /* Q13 */
204
205        /* From now on normalizer is computed recursively */
206        for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
207            basis_ptr--;
208
209            /* Check that we are within range of the array */
210            silk_assert( basis_ptr >= frame_4kHz );
211            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
212
213            cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
214
215            /* Add contribution of new sample and remove contribution from oldest sample */
216            normalizer = silk_ADD32( normalizer,
217                silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
218                silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
219
220            matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
221                (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );                  /* Q13 */
222        }
223        /* Update target pointer */
224        target_ptr += SF_LENGTH_8KHZ;
225    }
226
227    /* Combine two subframes into single correlation measure and apply short-lag bias */
228    if( nb_subfr == PE_MAX_NB_SUBFR ) {
229        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
230            sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
231                + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ );               /* Q14 */
232            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
233            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
234        }
235    } else {
236        /* Only short-lag bias */
237        for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
238            sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 );                          /* Q14 */
239            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */
240            C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum;                                            /* Q14 */
241        }
242    }
243
244    /* Sort */
245    length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
246    silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
247    silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
248                                          length_d_srch );
249
250    /* Escape if correlation is very low already here */
251    Cmax = (opus_int)C[ 0 ];                                                    /* Q14 */
252    if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
253        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
254        *LTPCorr_Q15  = 0;
255        *lagIndex     = 0;
256        *contourIndex = 0;
257        RESTORE_STACK;
258        return 1;
259    }
260
261    threshold = silk_SMULWB( search_thres1_Q16, Cmax );
262    for( i = 0; i < length_d_srch; i++ ) {
263        /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
264        if( C[ i ] > threshold ) {
265            d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
266        } else {
267            length_d_srch = i;
268            break;
269        }
270    }
271    silk_assert( length_d_srch > 0 );
272
273    ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
274    for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
275        d_comp[ i - D_COMP_MIN ] = 0;
276    }
277    for( i = 0; i < length_d_srch; i++ ) {
278        d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
279    }
280
281    /* Convolution */
282    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
283        d_comp[ i - D_COMP_MIN ] +=
284            d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
285    }
286
287    length_d_srch = 0;
288    for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
289        if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
290            d_srch[ length_d_srch ] = i;
291            length_d_srch++;
292        }
293    }
294
295    /* Convolution */
296    for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
297        d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
298            + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
299    }
300
301    length_d_comp = 0;
302    for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
303        if( d_comp[ i - D_COMP_MIN ] > 0 ) {
304            d_comp[ length_d_comp ] = i - 2;
305            length_d_comp++;
306        }
307    }
308
309    /**********************************************************************************
310    ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
311    *************************************************************************************/
312
313    /******************************************************************************
314    ** Scale signal down to avoid correlations measures from overflowing
315    *******************************************************************************/
316    /* find scaling as max scaling for each subframe */
317    silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
318    if( shift > 0 ) {
319        shift = silk_RSHIFT( shift, 1 );
320        for( i = 0; i < frame_length_8kHz; i++ ) {
321            frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
322        }
323    }
324
325    /*********************************************************************************
326    * Find energy of each subframe projected onto its history, for a range of delays
327    *********************************************************************************/
328    silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
329
330    target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
331    for( k = 0; k < nb_subfr; k++ ) {
332
333        /* Check that we are within range of the array */
334        silk_assert( target_ptr >= frame_8kHz );
335        silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
336
337        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
338        for( j = 0; j < length_d_comp; j++ ) {
339            d = d_comp[ j ];
340            basis_ptr = target_ptr - d;
341
342            /* Check that we are within range of the array */
343            silk_assert( basis_ptr >= frame_8kHz );
344            silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
345
346            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
347            if( cross_corr > 0 ) {
348                energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
349                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
350                    (opus_int16)silk_DIV32_varQ( cross_corr,
351                                                 silk_ADD32( energy_target,
352                                                             energy_basis ),
353                                                 13 + 1 );                                      /* Q13 */
354            } else {
355                matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
356            }
357        }
358        target_ptr += SF_LENGTH_8KHZ;
359    }
360
361    /* search over lag range and lags codebook */
362    /* scale factor for lag codebook, as a function of center lag */
363
364    CCmax   = silk_int32_MIN;
365    CCmax_b = silk_int32_MIN;
366
367    CBimax = 0; /* To avoid returning undefined lag values */
368    lag = -1;   /* To check if lag with strong enough correlation has been found */
369
370    if( prevLag > 0 ) {
371        if( Fs_kHz == 12 ) {
372            prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
373        } else if( Fs_kHz == 16 ) {
374            prevLag = silk_RSHIFT( prevLag, 1 );
375        }
376        prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
377    } else {
378        prevLag_log2_Q7 = 0;
379    }
380    silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
381    /* Set up stage 2 codebook based on number of subframes */
382    if( nb_subfr == PE_MAX_NB_SUBFR ) {
383        cbk_size   = PE_NB_CBKS_STAGE2_EXT;
384        Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
385        if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
386            /* If input is 8 khz use a larger codebook here because it is last stage */
387            nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
388        } else {
389            nb_cbk_search = PE_NB_CBKS_STAGE2;
390        }
391    } else {
392        cbk_size       = PE_NB_CBKS_STAGE2_10MS;
393        Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
394        nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;
395    }
396
397    for( k = 0; k < length_d_srch; k++ ) {
398        d = d_srch[ k ];
399        for( j = 0; j < nb_cbk_search; j++ ) {
400            CC[ j ] = 0;
401            for( i = 0; i < nb_subfr; i++ ) {
402                opus_int d_subfr;
403                /* Try all codebooks */
404                d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
405                CC[ j ] = CC[ j ]
406                    + (opus_int32)matrix_ptr( C, i,
407                                              d_subfr - ( MIN_LAG_8KHZ - 2 ),
408                                              CSTRIDE_8KHZ );
409            }
410        }
411        /* Find best codebook */
412        CCmax_new = silk_int32_MIN;
413        CBimax_new = 0;
414        for( i = 0; i < nb_cbk_search; i++ ) {
415            if( CC[ i ] > CCmax_new ) {
416                CCmax_new = CC[ i ];
417                CBimax_new = i;
418            }
419        }
420
421        /* Bias towards shorter lags */
422        lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
423        silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
424        silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
425        CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
426
427        /* Bias towards previous lag */
428        silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
429        if( prevLag > 0 ) {
430            delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
431            silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
432            delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
433            prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
434            prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
435            CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
436        }
437
438        if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */
439            CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 )  &&  /* Correlation needs to be high enough to be voiced */
440            silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ      /* Lag must be in range                             */
441         ) {
442            CCmax_b = CCmax_new_b;
443            CCmax   = CCmax_new;
444            lag     = d;
445            CBimax  = CBimax_new;
446        }
447    }
448
449    if( lag == -1 ) {
450        /* No suitable candidate found */
451        silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
452        *LTPCorr_Q15  = 0;
453        *lagIndex     = 0;
454        *contourIndex = 0;
455        RESTORE_STACK;
456        return 1;
457    }
458
459    /* Output normalized correlation */
460    *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
461    silk_assert( *LTPCorr_Q15 >= 0 );
462
463    if( Fs_kHz > 8 ) {
464        VARDECL( opus_int16, scratch_mem );
465        /***************************************************************************/
466        /* Scale input signal down to avoid correlations measures from overflowing */
467        /***************************************************************************/
468        /* find scaling as max scaling for each subframe */
469        silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
470        ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
471        if( shift > 0 ) {
472            /* Move signal to scratch mem because the input signal should be unchanged */
473            shift = silk_RSHIFT( shift, 1 );
474            for( i = 0; i < frame_length; i++ ) {
475                scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
476            }
477            input_frame_ptr = scratch_mem;
478        } else {
479            input_frame_ptr = frame;
480        }
481
482        /* Search in original signal */
483
484        CBimax_old = CBimax;
485        /* Compensate for decimation */
486        silk_assert( lag == silk_SAT16( lag ) );
487        if( Fs_kHz == 12 ) {
488            lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
489        } else if( Fs_kHz == 16 ) {
490            lag = silk_LSHIFT( lag, 1 );
491        } else {
492            lag = silk_SMULBB( lag, 3 );
493        }
494
495        lag = silk_LIMIT_int( lag, min_lag, max_lag );
496        start_lag = silk_max_int( lag - 2, min_lag );
497        end_lag   = silk_min_int( lag + 2, max_lag );
498        lag_new   = lag;                                    /* to avoid undefined lag */
499        CBimax    = 0;                                      /* to avoid undefined lag */
500
501        CCmax = silk_int32_MIN;
502        /* pitch lags according to second stage */
503        for( k = 0; k < nb_subfr; k++ ) {
504            pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
505        }
506
507        /* Set up codebook parameters according to complexity setting and frame length */
508        if( nb_subfr == PE_MAX_NB_SUBFR ) {
509            nb_cbk_search   = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
510            cbk_size        = PE_NB_CBKS_STAGE3_MAX;
511            Lag_CB_ptr      = &silk_CB_lags_stage3[ 0 ][ 0 ];
512        } else {
513            nb_cbk_search   = PE_NB_CBKS_STAGE3_10MS;
514            cbk_size        = PE_NB_CBKS_STAGE3_10MS;
515            Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
516        }
517
518        /* Calculate the correlations and energies needed in stage 3 */
519        ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
520        ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
521        silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
522        silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
523
524        lag_counter = 0;
525        silk_assert( lag == silk_SAT16( lag ) );
526        contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
527
528        target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
529        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
530        for( d = start_lag; d <= end_lag; d++ ) {
531            for( j = 0; j < nb_cbk_search; j++ ) {
532                cross_corr = 0;
533                energy     = energy_target;
534                for( k = 0; k < nb_subfr; k++ ) {
535                    cross_corr = silk_ADD32( cross_corr,
536                        matrix_ptr( cross_corr_st3, k, j,
537                                    nb_cbk_search )[ lag_counter ] );
538                    energy     = silk_ADD32( energy,
539                        matrix_ptr( energies_st3, k, j,
540                                    nb_cbk_search )[ lag_counter ] );
541                    silk_assert( energy >= 0 );
542                }
543                if( cross_corr > 0 ) {
544                    CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 );          /* Q13 */
545                    /* Reduce depending on flatness of contour */
546                    diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j );            /* Q15 */
547                    silk_assert( diff == silk_SAT16( diff ) );
548                    CCmax_new = silk_SMULWB( CCmax_new, diff );                         /* Q14 */
549                } else {
550                    CCmax_new = 0;
551                }
552
553                if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
554                    CCmax   = CCmax_new;
555                    lag_new = d;
556                    CBimax  = j;
557                }
558            }
559            lag_counter++;
560        }
561
562        for( k = 0; k < nb_subfr; k++ ) {
563            pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
564            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
565        }
566        *lagIndex = (opus_int16)( lag_new - min_lag);
567        *contourIndex = (opus_int8)CBimax;
568    } else {        /* Fs_kHz == 8 */
569        /* Save Lags */
570        for( k = 0; k < nb_subfr; k++ ) {
571            pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
572            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
573        }
574        *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
575        *contourIndex = (opus_int8)CBimax;
576    }
577    silk_assert( *lagIndex >= 0 );
578    /* return as voiced */
579    RESTORE_STACK;
580    return 0;
581}
582
583/***********************************************************************
584 * Calculates the correlations used in stage 3 search. In order to cover
585 * the whole lag codebook for all the searched offset lags (lag +- 2),
586 * the following correlations are needed in each sub frame:
587 *
588 * sf1: lag range [-8,...,7] total 16 correlations
589 * sf2: lag range [-4,...,4] total 9 correlations
590 * sf3: lag range [-3,....4] total 8 correltions
591 * sf4: lag range [-6,....8] total 15 correlations
592 *
593 * In total 48 correlations. The direct implementation computed in worst
594 * case 4*12*5 = 240 correlations, but more likely around 120.
595 ***********************************************************************/
596static void silk_P_Ana_calc_corr_st3(
597    silk_pe_stage3_vals cross_corr_st3[],              /* O 3 DIM correlation array */
598    const opus_int16  frame[],                         /* I vector to correlate         */
599    opus_int          start_lag,                       /* I lag offset to search around */
600    opus_int          sf_length,                       /* I length of a 5 ms subframe   */
601    opus_int          nb_subfr,                        /* I number of subframes         */
602    opus_int          complexity,                      /* I Complexity setting          */
603    int               arch                             /* I Run-time architecture       */
604)
605{
606    const opus_int16 *target_ptr;
607    opus_int   i, j, k, lag_counter, lag_low, lag_high;
608    opus_int   nb_cbk_search, delta, idx, cbk_size;
609    VARDECL( opus_int32, scratch_mem );
610    VARDECL( opus_int32, xcorr32 );
611    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
612    SAVE_STACK;
613
614    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
615    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
616
617    if( nb_subfr == PE_MAX_NB_SUBFR ) {
618        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
619        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
620        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
621        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
622    } else {
623        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
624        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
625        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
626        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
627        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
628    }
629    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
630    ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
631
632    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
633    for( k = 0; k < nb_subfr; k++ ) {
634        lag_counter = 0;
635
636        /* Calculate the correlations for each subframe */
637        lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
638        lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
639        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
640        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
641        for( j = lag_low; j <= lag_high; j++ ) {
642            silk_assert( lag_counter < SCRATCH_SIZE );
643            scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
644            lag_counter++;
645        }
646
647        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
648        for( i = 0; i < nb_cbk_search; i++ ) {
649            /* Fill out the 3 dim array that stores the correlations for */
650            /* each code_book vector for each start lag */
651            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
652            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
653                silk_assert( idx + j < SCRATCH_SIZE );
654                silk_assert( idx + j < lag_counter );
655                matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
656                    scratch_mem[ idx + j ];
657            }
658        }
659        target_ptr += sf_length;
660    }
661    RESTORE_STACK;
662}
663
664/********************************************************************/
665/* Calculate the energies for first two subframes. The energies are */
666/* calculated recursively.                                          */
667/********************************************************************/
668static void silk_P_Ana_calc_energy_st3(
669    silk_pe_stage3_vals energies_st3[],                 /* O 3 DIM energy array */
670    const opus_int16  frame[],                          /* I vector to calc energy in    */
671    opus_int          start_lag,                        /* I lag offset to search around */
672    opus_int          sf_length,                        /* I length of one 5 ms subframe */
673    opus_int          nb_subfr,                         /* I number of subframes         */
674    opus_int          complexity                        /* I Complexity setting          */
675)
676{
677    const opus_int16 *target_ptr, *basis_ptr;
678    opus_int32 energy;
679    opus_int   k, i, j, lag_counter;
680    opus_int   nb_cbk_search, delta, idx, cbk_size, lag_diff;
681    VARDECL( opus_int32, scratch_mem );
682    const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
683    SAVE_STACK;
684
685    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
686    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
687
688    if( nb_subfr == PE_MAX_NB_SUBFR ) {
689        Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
690        Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
691        nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
692        cbk_size      = PE_NB_CBKS_STAGE3_MAX;
693    } else {
694        silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
695        Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
696        Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
697        nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
698        cbk_size      = PE_NB_CBKS_STAGE3_10MS;
699    }
700    ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
701
702    target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
703    for( k = 0; k < nb_subfr; k++ ) {
704        lag_counter = 0;
705
706        /* Calculate the energy for first lag */
707        basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
708        energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
709        silk_assert( energy >= 0 );
710        scratch_mem[ lag_counter ] = energy;
711        lag_counter++;
712
713        lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
714        for( i = 1; i < lag_diff; i++ ) {
715            /* remove part outside new window */
716            energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
717            silk_assert( energy >= 0 );
718
719            /* add part that comes into window */
720            energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
721            silk_assert( energy >= 0 );
722            silk_assert( lag_counter < SCRATCH_SIZE );
723            scratch_mem[ lag_counter ] = energy;
724            lag_counter++;
725        }
726
727        delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
728        for( i = 0; i < nb_cbk_search; i++ ) {
729            /* Fill out the 3 dim array that stores the correlations for    */
730            /* each code_book vector for each start lag                     */
731            idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
732            for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
733                silk_assert( idx + j < SCRATCH_SIZE );
734                silk_assert( idx + j < lag_counter );
735                matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
736                    scratch_mem[ idx + j ];
737                silk_assert(
738                    matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
739            }
740        }
741        target_ptr += sf_length;
742    }
743    RESTORE_STACK;
744}
745