c4t64fx.c revision a87228e3e3f42d3676ced0d38ad9913d7bef9de6
1/*
2 ** Copyright 2003-2010, VisualOn, Inc.
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 **     http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17/***********************************************************************
18*      File: c4t64fx.c                                                 *
19*                                                                      *
20*      Description:Performs algebraic codebook search for higher modes *
21*                                                                      *
22************************************************************************/
23
24/************************************************************************
25* Function: ACELP_4t64_fx()                                             *
26*                                                                       *
27* 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook.                   *
28* 4 tracks x 16 positions per track = 64 samples.                       *
29*                                                                       *
30* 20 bits --> 4 pulses in a frame of 64 samples.                        *
31* 36 bits --> 8 pulses in a frame of 64 samples.                        *
32* 44 bits --> 10 pulses in a frame of 64 samples.                       *
33* 52 bits --> 12 pulses in a frame of 64 samples.                       *
34* 64 bits --> 16 pulses in a frame of 64 samples.                       *
35* 72 bits --> 18 pulses in a frame of 64 samples.                       *
36* 88 bits --> 24 pulses in a frame of 64 samples.                       *
37*                                                                       *
38* All pulses can have two (2) possible amplitudes: +1 or -1.            *
39* Each pulse can have sixteen (16) possible positions.                  *
40*************************************************************************/
41
42#include "typedef.h"
43#include "basic_op.h"
44#include "math_op.h"
45#include "acelp.h"
46#include "cnst.h"
47
48#include "q_pulse.h"
49
50static Word16 tipos[36] = {
51    0, 1, 2, 3,                            /* starting point &ipos[0], 1st iter */
52    1, 2, 3, 0,                            /* starting point &ipos[4], 2nd iter */
53    2, 3, 0, 1,                            /* starting point &ipos[8], 3rd iter */
54    3, 0, 1, 2,                            /* starting point &ipos[12], 4th iter */
55    0, 1, 2, 3,
56    1, 2, 3, 0,
57    2, 3, 0, 1,
58    3, 0, 1, 2,
59    0, 1, 2, 3};                           /* end point for 24 pulses &ipos[35], 4th iter */
60
61#define NB_PULSE_MAX  24
62
63#define L_SUBFR   64
64#define NB_TRACK  4
65#define STEP      4
66#define NB_POS    16
67#define MSIZE     256
68#define NB_MAX    8
69#define NPMAXPT   ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK)
70
71/* Private functions */
72void cor_h_vec_012(
73        Word16 h[],                           /* (i) scaled impulse response                 */
74        Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
75        Word16 track,                         /* (i) track to use                            */
76        Word16 sign[],                        /* (i) sign vector                             */
77        Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
78        Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
79        Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
80        );
81
82void cor_h_vec_012_asm(
83        Word16 h[],                           /* (i) scaled impulse response                 */
84        Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
85        Word16 track,                         /* (i) track to use                            */
86        Word16 sign[],                        /* (i) sign vector                             */
87        Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
88        Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
89        Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
90        );
91
92void cor_h_vec_30(
93        Word16 h[],                           /* (i) scaled impulse response                 */
94        Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
95        Word16 track,                         /* (i) track to use                            */
96        Word16 sign[],                        /* (i) sign vector                             */
97        Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
98        Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
99        Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
100        );
101
102void search_ixiy(
103        Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
104        Word16 track_x,                       /* (i) track of pulse 1                   */
105        Word16 track_y,                       /* (i) track of pulse 2                   */
106        Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
107        Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
108        Word16 * ix,                          /* (o) position of pulse 1                */
109        Word16 * iy,                          /* (o) position of pulse 2                */
110        Word16 dn[],                          /* (i) corr. between target and h[]       */
111        Word16 dn2[],                         /* (i) vector of selected positions       */
112        Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
113        Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
114        Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
115        );
116
117
118void ACELP_4t64_fx(
119        Word16 dn[],                          /* (i) <12b : correlation between target x[] and H[]      */
120        Word16 cn[],                          /* (i) <12b : residual after long term prediction         */
121        Word16 H[],                           /* (i) Q12: impulse response of weighted synthesis filter */
122        Word16 code[],                        /* (o) Q9 : algebraic (fixed) codebook excitation         */
123        Word16 y[],                           /* (o) Q9 : filtered fixed codebook excitation            */
124        Word16 nbbits,                        /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits                */
125        Word16 ser_size,                      /* (i) : bit rate                                         */
126        Word16 _index[]                       /* (o) : index (20): 5+5+5+5 = 20 bits.                   */
127        /* (o) : index (36): 9+9+9+9 = 36 bits.                   */
128        /* (o) : index (44): 13+9+13+9 = 44 bits.                 */
129        /* (o) : index (52): 13+13+13+13 = 52 bits.               */
130        /* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits.       */
131        /* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits.     */
132        /* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits.   */
133        )
134{
135    Word32 i, j, k;
136    Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
137    Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
138    Word16 *p0, *p1, *p2, *p3, *psign;
139    Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
140    Word32 s, cor, L_tmp, L_index;
141    Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
142    Word16 ind[NPMAXPT * NB_TRACK];
143    Word16 codvec[NB_PULSE_MAX], nbpos[10];
144    Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
145    Word16 h_buf[4 * L_SUBFR];
146    Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
147    Word16 ipos[NB_PULSE_MAX];
148
149    switch (nbbits)
150    {
151        case 20:                               /* 20 bits, 4 pulses, 4 tracks */
152            nbiter = 4;                          /* 4x16x16=1024 loop */
153            alp = 8192;                          /* alp = 2.0 (Q12) */
154            nb_pulse = 4;
155            nbpos[0] = 4;
156            nbpos[1] = 8;
157            break;
158        case 36:                               /* 36 bits, 8 pulses, 4 tracks */
159            nbiter = 4;                          /* 4x20x16=1280 loop */
160            alp = 4096;                          /* alp = 1.0 (Q12) */
161            nb_pulse = 8;
162            nbpos[0] = 4;
163            nbpos[1] = 8;
164            nbpos[2] = 8;
165            break;
166        case 44:                               /* 44 bits, 10 pulses, 4 tracks */
167            nbiter = 4;                          /* 4x26x16=1664 loop */
168            alp = 4096;                          /* alp = 1.0 (Q12) */
169            nb_pulse = 10;
170            nbpos[0] = 4;
171            nbpos[1] = 6;
172            nbpos[2] = 8;
173            nbpos[3] = 8;
174            break;
175        case 52:                               /* 52 bits, 12 pulses, 4 tracks */
176            nbiter = 4;                          /* 4x26x16=1664 loop */
177            alp = 4096;                          /* alp = 1.0 (Q12) */
178            nb_pulse = 12;
179            nbpos[0] = 4;
180            nbpos[1] = 6;
181            nbpos[2] = 8;
182            nbpos[3] = 8;
183            break;
184        case 64:                               /* 64 bits, 16 pulses, 4 tracks */
185            nbiter = 3;                          /* 3x36x16=1728 loop */
186            alp = 3277;                          /* alp = 0.8 (Q12) */
187            nb_pulse = 16;
188            nbpos[0] = 4;
189            nbpos[1] = 4;
190            nbpos[2] = 6;
191            nbpos[3] = 6;
192            nbpos[4] = 8;
193            nbpos[5] = 8;
194            break;
195        case 72:                               /* 72 bits, 18 pulses, 4 tracks */
196            nbiter = 3;                          /* 3x35x16=1680 loop */
197            alp = 3072;                          /* alp = 0.75 (Q12) */
198            nb_pulse = 18;
199            nbpos[0] = 2;
200            nbpos[1] = 3;
201            nbpos[2] = 4;
202            nbpos[3] = 5;
203            nbpos[4] = 6;
204            nbpos[5] = 7;
205            nbpos[6] = 8;
206            break;
207        case 88:                               /* 88 bits, 24 pulses, 4 tracks */
208            if(ser_size > 462)
209                nbiter = 1;
210            else
211                nbiter = 2;                    /* 2x53x16=1696 loop */
212
213            alp = 2048;                          /* alp = 0.5 (Q12) */
214            nb_pulse = 24;
215            nbpos[0] = 2;
216            nbpos[1] = 2;
217            nbpos[2] = 3;
218            nbpos[3] = 4;
219            nbpos[4] = 5;
220            nbpos[5] = 6;
221            nbpos[6] = 7;
222            nbpos[7] = 8;
223            nbpos[8] = 8;
224            nbpos[9] = 8;
225            break;
226        default:
227            nbiter = 0;
228            alp = 0;
229            nb_pulse = 0;
230    }
231
232    for (i = 0; i < nb_pulse; i++)
233    {
234        codvec[i] = i;
235    }
236
237    /*----------------------------------------------------------------*
238     * Find sign for each pulse position.                             *
239     *----------------------------------------------------------------*/
240    /* calculate energy for normalization of cn[] and dn[] */
241    /* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
242#ifdef ASM_OPT                  /* asm optimization branch */
243    s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
244#else
245    s = Dot_product12(cn, cn, L_SUBFR, &exp);
246#endif
247
248    Isqrt_n(&s, &exp);
249    s = L_shl(s, (exp + 5));
250    k_cn = extract_h(L_add(s, 0x8000));
251
252    /* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
253#ifdef ASM_OPT                      /* asm optimization branch */
254    s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
255#else
256    s = Dot_product12(dn, dn, L_SUBFR, &exp);
257#endif
258
259    Isqrt_n(&s, &exp);
260    k_dn = voround(L_shl(s, (exp + 5 + 3)));    /* k_dn = 256..4096 */
261    k_dn = vo_mult_r(alp, k_dn);              /* alp in Q12 */
262
263    /* mix normalized cn[] and dn[] */
264    p0 = cn;
265    p1 = dn;
266    p2 = dn2;
267
268    for (i = 0; i < L_SUBFR/4; i++)
269    {
270        s = (k_cn* (*p0++))+(k_dn * (*p1++));
271        *p2++ = s >> 7;
272        s = (k_cn* (*p0++))+(k_dn * (*p1++));
273        *p2++ = s >> 7;
274        s = (k_cn* (*p0++))+(k_dn * (*p1++));
275        *p2++ = s >> 7;
276        s = (k_cn* (*p0++))+(k_dn * (*p1++));
277        *p2++ = s >> 7;
278    }
279
280    /* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[]    */
281    for(i = 0; i < L_SUBFR; i++)
282    {
283        val = dn[i];
284        ps = dn2[i];
285        if (ps >= 0)
286        {
287            sign[i] = 32767;             /* sign = +1 (Q12) */
288            vec[i] = -32768;
289        } else
290        {
291            sign[i] = -32768;            /* sign = -1 (Q12) */
292            vec[i] = 32767;
293            dn[i] = -val;
294            dn2[i] = -ps;
295        }
296    }
297    /*----------------------------------------------------------------*
298     * Select NB_MAX position per track according to max of dn2[].    *
299     *----------------------------------------------------------------*/
300    pos = 0;
301    for (i = 0; i < NB_TRACK; i++)
302    {
303        for (k = 0; k < NB_MAX; k++)
304        {
305            ps = -1;
306            for (j = i; j < L_SUBFR; j += STEP)
307            {
308                if(dn2[j] > ps)
309                {
310                    ps = dn2[j];
311                    pos = j;
312                }
313            }
314            dn2[pos] = (k - NB_MAX);     /* dn2 < 0 when position is selected */
315            if (k == 0)
316            {
317                pos_max[i] = pos;
318            }
319        }
320    }
321
322    /*--------------------------------------------------------------*
323     * Scale h[] to avoid overflow and to get maximum of precision  *
324     * on correlation.                                              *
325     *                                                              *
326     * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16).         *
327     *  ==> This allow addition of 16 pulses without saturation.    *
328     *                                                              *
329     * Energy worst case (on resonant impulse response),            *
330     * - energy of h[] is approximately MAX/16.                     *
331     * - During search, the energy is divided by 8 to avoid         *
332     *   overflow on "alp". (energy of h[] = MAX/128).              *
333     *  ==> "alp" worst case detected is 22854 on sinusoidal wave.  *
334     *--------------------------------------------------------------*/
335
336    /* impulse response buffer for fast computation */
337
338    h = h_buf;
339    h_inv = h_buf + (2 * L_SUBFR);
340    L_tmp = 0;
341    for (i = 0; i < L_SUBFR; i++)
342    {
343        *h++ = 0;
344        *h_inv++ = 0;
345        L_tmp += (H[i] * H[i]) << 1;
346    }
347    /* scale h[] down (/2) when energy of h[] is high with many pulses used */
348    val = extract_h(L_tmp);
349    h_shift = 0;
350
351    if ((nb_pulse >= 12) && (val > 1024))
352    {
353        h_shift = 1;
354    }
355    p0 = H;
356    p1 = h;
357    p2 = h_inv;
358
359    for (i = 0; i < L_SUBFR/4; i++)
360    {
361        *p1 = *p0++ >> h_shift;
362        *p2++ = -(*p1++);
363        *p1 = *p0++ >> h_shift;
364        *p2++ = -(*p1++);
365        *p1 = *p0++ >> h_shift;
366        *p2++ = -(*p1++);
367        *p1 = *p0++ >> h_shift;
368        *p2++ = -(*p1++);
369    }
370
371    /*------------------------------------------------------------*
372     * Compute rrixix[][] needed for the codebook search.         *
373     * This algorithm compute impulse response energy of all      *
374     * positions (16) in each track (4).       Total = 4x16 = 64. *
375     *------------------------------------------------------------*/
376
377    /* storage order --> i3i3, i2i2, i1i1, i0i0 */
378
379    /* Init pointers to last position of rrixix[] */
380    p0 = &rrixix[0][NB_POS - 1];
381    p1 = &rrixix[1][NB_POS - 1];
382    p2 = &rrixix[2][NB_POS - 1];
383    p3 = &rrixix[3][NB_POS - 1];
384
385    ptr_h1 = h;
386    cor = 0x00008000L;                             /* for rounding */
387    for (i = 0; i < NB_POS; i++)
388    {
389        cor += vo_L_mult((*ptr_h1), (*ptr_h1));
390        ptr_h1++;
391        *p3-- = extract_h(cor);
392        cor += vo_L_mult((*ptr_h1), (*ptr_h1));
393        ptr_h1++;
394        *p2-- = extract_h(cor);
395        cor += vo_L_mult((*ptr_h1), (*ptr_h1));
396        ptr_h1++;
397        *p1-- = extract_h(cor);
398        cor += vo_L_mult((*ptr_h1), (*ptr_h1));
399        ptr_h1++;
400        *p0-- = extract_h(cor);
401    }
402
403    /*------------------------------------------------------------*
404     * Compute rrixiy[][] needed for the codebook search.         *
405     * This algorithm compute correlation between 2 pulses        *
406     * (2 impulses responses) in 4 possible adjacents tracks.     *
407     * (track 0-1, 1-2, 2-3 and 3-0).     Total = 4x16x16 = 1024. *
408     *------------------------------------------------------------*/
409
410    /* storage order --> i2i3, i1i2, i0i1, i3i0 */
411
412    pos = MSIZE - 1;
413    ptr_hf = h + 1;
414
415    for (k = 0; k < NB_POS; k++)
416    {
417        p3 = &rrixiy[2][pos];
418        p2 = &rrixiy[1][pos];
419        p1 = &rrixiy[0][pos];
420        p0 = &rrixiy[3][pos - NB_POS];
421
422        cor = 0x00008000L;                   /* for rounding */
423        ptr_h1 = h;
424        ptr_h2 = ptr_hf;
425
426        for (i = k + 1; i < NB_POS; i++)
427        {
428            cor += vo_L_mult((*ptr_h1), (*ptr_h2));
429            ptr_h1++;
430            ptr_h2++;
431            *p3 = extract_h(cor);
432            cor += vo_L_mult((*ptr_h1), (*ptr_h2));
433            ptr_h1++;
434            ptr_h2++;
435            *p2 = extract_h(cor);
436            cor += vo_L_mult((*ptr_h1), (*ptr_h2));
437            ptr_h1++;
438            ptr_h2++;
439            *p1 = extract_h(cor);
440            cor += vo_L_mult((*ptr_h1), (*ptr_h2));
441            ptr_h1++;
442            ptr_h2++;
443            *p0 = extract_h(cor);
444
445            p3 -= (NB_POS + 1);
446            p2 -= (NB_POS + 1);
447            p1 -= (NB_POS + 1);
448            p0 -= (NB_POS + 1);
449        }
450        cor += vo_L_mult((*ptr_h1), (*ptr_h2));
451        ptr_h1++;
452        ptr_h2++;
453        *p3 = extract_h(cor);
454        cor += vo_L_mult((*ptr_h1), (*ptr_h2));
455        ptr_h1++;
456        ptr_h2++;
457        *p2 = extract_h(cor);
458        cor += vo_L_mult((*ptr_h1), (*ptr_h2));
459        ptr_h1++;
460        ptr_h2++;
461        *p1 = extract_h(cor);
462
463        pos -= NB_POS;
464        ptr_hf += STEP;
465    }
466
467    /* storage order --> i3i0, i2i3, i1i2, i0i1 */
468
469    pos = MSIZE - 1;
470    ptr_hf = h + 3;
471
472    for (k = 0; k < NB_POS; k++)
473    {
474        p3 = &rrixiy[3][pos];
475        p2 = &rrixiy[2][pos - 1];
476        p1 = &rrixiy[1][pos - 1];
477        p0 = &rrixiy[0][pos - 1];
478
479        cor = 0x00008000L;                              /* for rounding */
480        ptr_h1 = h;
481        ptr_h2 = ptr_hf;
482
483        for (i = k + 1; i < NB_POS; i++)
484        {
485            cor += vo_L_mult((*ptr_h1), (*ptr_h2));
486            ptr_h1++;
487            ptr_h2++;
488            *p3 = extract_h(cor);
489            cor += vo_L_mult((*ptr_h1), (*ptr_h2));
490            ptr_h1++;
491            ptr_h2++;
492            *p2 = extract_h(cor);
493            cor += vo_L_mult((*ptr_h1), (*ptr_h2));
494            ptr_h1++;
495            ptr_h2++;
496            *p1 = extract_h(cor);
497            cor += vo_L_mult((*ptr_h1), (*ptr_h2));
498            ptr_h1++;
499            ptr_h2++;
500            *p0 = extract_h(cor);
501
502            p3 -= (NB_POS + 1);
503            p2 -= (NB_POS + 1);
504            p1 -= (NB_POS + 1);
505            p0 -= (NB_POS + 1);
506        }
507        cor += vo_L_mult((*ptr_h1), (*ptr_h2));
508        ptr_h1++;
509        ptr_h2++;
510        *p3 = extract_h(cor);
511
512        pos--;
513        ptr_hf += STEP;
514    }
515
516    /*------------------------------------------------------------*
517     * Modification of rrixiy[][] to take signs into account.     *
518     *------------------------------------------------------------*/
519
520    p0 = &rrixiy[0][0];
521
522    for (k = 0; k < NB_TRACK; k++)
523    {
524        j_temp = (k + 1)&0x03;
525        for (i = k; i < L_SUBFR; i += STEP)
526        {
527            psign = sign;
528            if (psign[i] < 0)
529            {
530                psign = vec;
531            }
532            j = j_temp;
533            for (; j < L_SUBFR; j += STEP)
534            {
535                *p0 = vo_mult(*p0, psign[j]);
536                p0++;
537            }
538        }
539    }
540
541    /*-------------------------------------------------------------------*
542     *                       Deep first search                           *
543     *-------------------------------------------------------------------*/
544
545    psk = -1;
546    alpk = 1;
547
548    for (k = 0; k < nbiter; k++)
549    {
550        j_temp = k<<2;
551        for (i = 0; i < nb_pulse; i++)
552            ipos[i] = tipos[j_temp + i];
553
554        if(nbbits == 20)
555        {
556            pos = 0;
557            ps = 0;
558            alp = 0;
559            for (i = 0; i < L_SUBFR; i++)
560            {
561                vec[i] = 0;
562            }
563        } else if ((nbbits == 36) || (nbbits == 44))
564        {
565            /* first stage: fix 2 pulses */
566            pos = 2;
567
568            ix = ind[0] = pos_max[ipos[0]];
569            iy = ind[1] = pos_max[ipos[1]];
570            ps = dn[ix] + dn[iy];
571            i = ix >> 2;                /* ix / STEP */
572            j = iy >> 2;                /* iy / STEP */
573            s = rrixix[ipos[0]][i] << 13;
574            s += rrixix[ipos[1]][j] << 13;
575            i = (i << 4) + j;         /* (ix/STEP)*NB_POS + (iy/STEP) */
576            s += rrixiy[ipos[0]][i] << 14;
577            alp = (s + 0x8000) >> 16;
578            if (sign[ix] < 0)
579                p0 = h_inv - ix;
580            else
581                p0 = h - ix;
582            if (sign[iy] < 0)
583                p1 = h_inv - iy;
584            else
585                p1 = h - iy;
586
587            for (i = 0; i < L_SUBFR; i++)
588            {
589                vec[i] = (*p0++) + (*p1++);
590            }
591
592            if(nbbits == 44)
593            {
594                ipos[8] = 0;
595                ipos[9] = 1;
596            }
597        } else
598        {
599            /* first stage: fix 4 pulses */
600            pos = 4;
601
602            ix = ind[0] = pos_max[ipos[0]];
603            iy = ind[1] = pos_max[ipos[1]];
604            i = ind[2] = pos_max[ipos[2]];
605            j = ind[3] = pos_max[ipos[3]];
606            ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]);
607
608            if (sign[ix] < 0)
609                p0 = h_inv - ix;
610            else
611                p0 = h - ix;
612
613            if (sign[iy] < 0)
614                p1 = h_inv - iy;
615            else
616                p1 = h - iy;
617
618            if (sign[i] < 0)
619                p2 = h_inv - i;
620            else
621                p2 = h - i;
622
623            if (sign[j] < 0)
624                p3 = h_inv - j;
625            else
626                p3 = h - j;
627
628            L_tmp = 0L;
629            for(i = 0; i < L_SUBFR; i++)
630            {
631                Word32 vecSq2;
632                vec[i]  = add1(add1(add1(*p0++, *p1++), *p2++), *p3++);
633                vecSq2 = (vec[i] * vec[i]) << 1;
634                if (vecSq2 > 0 && L_tmp > INT_MAX - vecSq2) {
635                    L_tmp = INT_MAX;
636                } else if (vecSq2 < 0 && L_tmp < INT_MIN - vecSq2) {
637                    L_tmp = INT_MIN;
638                } else {
639                    L_tmp  += vecSq2;
640                }
641            }
642
643            alp = ((L_tmp >> 3) + 0x8000) >> 16;
644
645            if(nbbits == 72)
646            {
647                ipos[16] = 0;
648                ipos[17] = 1;
649            }
650        }
651
652        /* other stages of 2 pulses */
653
654        for (j = pos, st = 0; j < nb_pulse; j += 2, st++)
655        {
656            /*--------------------------------------------------*
657             * Calculate correlation of all possible positions  *
658             * of the next 2 pulses with previous fixed pulses. *
659             * Each pulse can have 16 possible positions.       *
660             *--------------------------------------------------*/
661            if(ipos[j] == 3)
662            {
663                cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
664            }
665            else
666            {
667#ifdef ASM_OPT                 /* asm optimization branch */
668                cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
669#else
670                cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
671#endif
672            }
673            /*--------------------------------------------------*
674             * Find best positions of 2 pulses.                 *
675             *--------------------------------------------------*/
676            search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp,
677                    &ix, &iy, dn, dn2, cor_x, cor_y, rrixiy);
678
679            ind[j] = ix;
680            ind[j + 1] = iy;
681
682            if (sign[ix] < 0)
683                p0 = h_inv - ix;
684            else
685                p0 = h - ix;
686            if (sign[iy] < 0)
687                p1 = h_inv - iy;
688            else
689                p1 = h - iy;
690
691            for (i = 0; i < L_SUBFR; i+=4)
692            {
693                vec[i]   += add1((*p0++), (*p1++));
694                vec[i+1] += add1((*p0++), (*p1++));
695                vec[i+2] += add1((*p0++), (*p1++));
696                vec[i+3] += add1((*p0++), (*p1++));
697            }
698        }
699        /* memorise the best codevector */
700        ps = vo_mult(ps, ps);
701        s = vo_L_msu(vo_L_mult(alpk, ps), psk, alp);
702        if (s > 0)
703        {
704            psk = ps;
705            alpk = alp;
706            for (i = 0; i < nb_pulse; i++)
707            {
708                codvec[i] = ind[i];
709            }
710            for (i = 0; i < L_SUBFR; i++)
711            {
712                y[i] = vec[i];
713            }
714        }
715    }
716    /*-------------------------------------------------------------------*
717     * Build the codeword, the filtered codeword and index of codevector.*
718     *-------------------------------------------------------------------*/
719    for (i = 0; i < NPMAXPT * NB_TRACK; i++)
720    {
721        ind[i] = -1;
722    }
723    for (i = 0; i < L_SUBFR; i++)
724    {
725        code[i] = 0;
726        y[i] = vo_shr_r(y[i], 3);               /* Q12 to Q9 */
727    }
728    val = (512 >> h_shift);               /* codeword in Q9 format */
729    for (k = 0; k < nb_pulse; k++)
730    {
731        i = codvec[k];                       /* read pulse position */
732        j = sign[i];                         /* read sign           */
733        index = i >> 2;                 /* index = pos of pulse (0..15) */
734        track = (Word16) (i & 0x03);         /* track = i % NB_TRACK (0..3)  */
735
736        if (j > 0)
737        {
738            code[i] += val;
739            codvec[k] += 128;
740        } else
741        {
742            code[i] -= val;
743            index += NB_POS;
744        }
745
746        i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1));
747
748        while (ind[i] >= 0)
749        {
750            i += 1;
751        }
752        ind[i] = index;
753    }
754
755    k = 0;
756    /* Build index of codevector */
757    if(nbbits == 20)
758    {
759        for (track = 0; track < NB_TRACK; track++)
760        {
761            _index[track] = (Word16)(quant_1p_N1(ind[k], 4));
762            k += NPMAXPT;
763        }
764    } else if(nbbits == 36)
765    {
766        for (track = 0; track < NB_TRACK; track++)
767        {
768            _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
769            k += NPMAXPT;
770        }
771    } else if(nbbits == 44)
772    {
773        for (track = 0; track < NB_TRACK - 2; track++)
774        {
775            _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
776            k += NPMAXPT;
777        }
778        for (track = 2; track < NB_TRACK; track++)
779        {
780            _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
781            k += NPMAXPT;
782        }
783    } else if(nbbits == 52)
784    {
785        for (track = 0; track < NB_TRACK; track++)
786        {
787            _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
788            k += NPMAXPT;
789        }
790    } else if(nbbits == 64)
791    {
792        for (track = 0; track < NB_TRACK; track++)
793        {
794            L_index = quant_4p_4N(&ind[k], 4);
795            _index[track] = (Word16)((L_index >> 14) & 3);
796            _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
797            k += NPMAXPT;
798        }
799    } else if(nbbits == 72)
800    {
801        for (track = 0; track < NB_TRACK - 2; track++)
802        {
803            L_index = quant_5p_5N(&ind[k], 4);
804            _index[track] = (Word16)((L_index >> 10) & 0x03FF);
805            _index[track + NB_TRACK] = (Word16)(L_index & 0x03FF);
806            k += NPMAXPT;
807        }
808        for (track = 2; track < NB_TRACK; track++)
809        {
810            L_index = quant_4p_4N(&ind[k], 4);
811            _index[track] = (Word16)((L_index >> 14) & 3);
812            _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
813            k += NPMAXPT;
814        }
815    } else if(nbbits == 88)
816    {
817        for (track = 0; track < NB_TRACK; track++)
818        {
819            L_index = quant_6p_6N_2(&ind[k], 4);
820            _index[track] = (Word16)((L_index >> 11) & 0x07FF);
821            _index[track + NB_TRACK] = (Word16)(L_index & 0x07FF);
822            k += NPMAXPT;
823        }
824    }
825    return;
826}
827
828
829/*-------------------------------------------------------------------*
830 * Function  cor_h_vec()                                             *
831 * ~~~~~~~~~~~~~~~~~~~~~                                             *
832 * Compute correlations of h[] with vec[] for the specified track.   *
833 *-------------------------------------------------------------------*/
834void cor_h_vec_30(
835        Word16 h[],                           /* (i) scaled impulse response                 */
836        Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
837        Word16 track,                         /* (i) track to use                            */
838        Word16 sign[],                        /* (i) sign vector                             */
839        Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
840        Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
841        Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
842        )
843{
844    Word32 i, j, pos, corr;
845    Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
846    Word32 L_sum1,L_sum2;
847    cor_x = cor_1;
848    cor_y = cor_2;
849    p0 = rrixix[track];
850    p3 = rrixix[0];
851    pos = track;
852
853    for (i = 0; i < NB_POS; i+=2)
854    {
855        L_sum1 = L_sum2 = 0L;
856        p1 = h;
857        p2 = &vec[pos];
858        for (j=pos;j < L_SUBFR; j++)
859        {
860            L_sum1 += *p1 * *p2;
861            p2-=3;
862            L_sum2 += *p1++ * *p2;
863            p2+=4;
864        }
865        p2-=3;
866        L_sum2 += *p1++ * *p2++;
867        L_sum2 += *p1++ * *p2++;
868        L_sum2 += *p1++ * *p2++;
869
870        L_sum1 = (L_sum1 << 2);
871        L_sum2 = (L_sum2 << 2);
872
873        corr = vo_round(L_sum1);
874        *cor_x++ = vo_mult(corr, sign[pos]) + (*p0++);
875        corr = vo_round(L_sum2);
876        *cor_y++ = vo_mult(corr, sign[pos-3]) + (*p3++);
877        pos += STEP;
878
879        L_sum1 = L_sum2 = 0L;
880        p1 = h;
881        p2 = &vec[pos];
882        for (j=pos;j < L_SUBFR; j++)
883        {
884            L_sum1 += *p1 * *p2;
885            p2-=3;
886            L_sum2 += *p1++ * *p2;
887            p2+=4;
888        }
889        p2-=3;
890        L_sum2 += *p1++ * *p2++;
891        L_sum2 += *p1++ * *p2++;
892        L_sum2 += *p1++ * *p2++;
893
894        L_sum1 = (L_sum1 << 2);
895        L_sum2 = (L_sum2 << 2);
896
897        corr = vo_round(L_sum1);
898        *cor_x++ = vo_mult(corr, sign[pos]) + (*p0++);
899        corr = vo_round(L_sum2);
900        *cor_y++ = vo_mult(corr, sign[pos-3]) + (*p3++);
901        pos += STEP;
902    }
903    return;
904}
905
906void cor_h_vec_012(
907        Word16 h[],                           /* (i) scaled impulse response                 */
908        Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
909        Word16 track,                         /* (i) track to use                            */
910        Word16 sign[],                        /* (i) sign vector                             */
911        Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
912        Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
913        Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
914        )
915{
916    Word32 i, j, pos, corr;
917    Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
918    Word32 L_sum1,L_sum2;
919    cor_x = cor_1;
920    cor_y = cor_2;
921    p0 = rrixix[track];
922    p3 = rrixix[track+1];
923    pos = track;
924
925    for (i = 0; i < NB_POS; i+=2)
926    {
927        L_sum1 = L_sum2 = 0L;
928        p1 = h;
929        p2 = &vec[pos];
930        for (j=62-pos ;j >= 0; j--)
931        {
932            L_sum1 += *p1 * *p2++;
933            L_sum2 += *p1++ * *p2;
934        }
935        L_sum1 += *p1 * *p2;
936        L_sum1 = (L_sum1 << 2);
937        L_sum2 = (L_sum2 << 2);
938
939        corr = (L_sum1 + 0x8000) >> 16;
940        cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++);
941        corr = (L_sum2 + 0x8000) >> 16;
942        cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++);
943        pos += STEP;
944
945        L_sum1 = L_sum2 = 0L;
946        p1 = h;
947        p2 = &vec[pos];
948        for (j= 62-pos;j >= 0; j--)
949        {
950            L_sum1 += *p1 * *p2++;
951            L_sum2 += *p1++ * *p2;
952        }
953        L_sum1 += *p1 * *p2;
954        L_sum1 = (L_sum1 << 2);
955        L_sum2 = (L_sum2 << 2);
956
957        corr = (L_sum1 + 0x8000) >> 16;
958        cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++);
959        corr = (L_sum2 + 0x8000) >> 16;
960        cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++);
961        pos += STEP;
962    }
963    return;
964}
965
966/*-------------------------------------------------------------------*
967 * Function  search_ixiy()                                           *
968 * ~~~~~~~~~~~~~~~~~~~~~~~                                           *
969 * Find the best positions of 2 pulses in a subframe.                *
970 *-------------------------------------------------------------------*/
971
972void search_ixiy(
973        Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
974        Word16 track_x,                       /* (i) track of pulse 1                   */
975        Word16 track_y,                       /* (i) track of pulse 2                   */
976        Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
977        Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
978        Word16 * ix,                          /* (o) position of pulse 1                */
979        Word16 * iy,                          /* (o) position of pulse 2                */
980        Word16 dn[],                          /* (i) corr. between target and h[]       */
981        Word16 dn2[],                         /* (i) vector of selected positions       */
982        Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
983        Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
984        Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
985        )
986{
987    Word32 x, y, pos, thres_ix;
988    Word16 ps1, ps2, sq, sqk;
989    Word16 alp_16, alpk;
990    Word16 *p0, *p1, *p2;
991    Word32 s, alp0, alp1, alp2;
992
993    p0 = cor_x;
994    p1 = cor_y;
995    p2 = rrixiy[track_x];
996
997    thres_ix = nb_pos_ix - NB_MAX;
998
999    alp0 = L_deposit_h(*alp);
1000    alp0 = (alp0 + 0x00008000L);       /* for rounding */
1001
1002    sqk = -1;
1003    alpk = 1;
1004
1005    for (x = track_x; x < L_SUBFR; x += STEP)
1006    {
1007        ps1 = *ps + dn[x];
1008        alp1 = L_add(alp0, ((*p0++)<<13));
1009
1010        if (dn2[x] < thres_ix)
1011        {
1012            pos = -1;
1013            for (y = track_y; y < L_SUBFR; y += STEP)
1014            {
1015                ps2 = add1(ps1, dn[y]);
1016
1017                alp2 = L_add(alp1, ((*p1++)<<13));
1018                alp2 = L_add(alp2, ((*p2++)<<14));
1019                alp_16 = extract_h(alp2);
1020                sq = vo_mult(ps2, ps2);
1021                s = L_sub(vo_L_mult(alpk, sq), L_mult(sqk, alp_16));
1022
1023                if (s > 0)
1024                {
1025                    sqk = sq;
1026                    alpk = alp_16;
1027                    pos = y;
1028                }
1029            }
1030            p1 -= NB_POS;
1031
1032            if (pos >= 0)
1033            {
1034                *ix = x;
1035                *iy = pos;
1036            }
1037        } else
1038        {
1039            p2 += NB_POS;
1040        }
1041    }
1042
1043    *ps = add1(*ps, add1(dn[*ix], dn[*iy]));
1044    *alp = alpk;
1045
1046    return;
1047}
1048
1049
1050
1051
1052