c4t64fx.c revision 41050cdb033641ddf26831d9272c0930f7b40a2d
1/*
2 ** Copyright 2003-2010, VisualOn, Inc.
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 **     http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17/***********************************************************************
18*      File: c4t64fx.c                                                 *
19*                                                                      *
20*	   Description:Performs algebraic codebook search for higher modes *
21*                                                                      *
22************************************************************************/
23
24/************************************************************************
25* Function: ACELP_4t64_fx()                                             *
26*                                                                       *
27* 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook.                   *
28* 4 tracks x 16 positions per track = 64 samples.                       *
29*                                                                       *
30* 20 bits --> 4 pulses in a frame of 64 samples.                        *
31* 36 bits --> 8 pulses in a frame of 64 samples.                        *
32* 44 bits --> 10 pulses in a frame of 64 samples.                       *
33* 52 bits --> 12 pulses in a frame of 64 samples.                       *
34* 64 bits --> 16 pulses in a frame of 64 samples.                       *
35* 72 bits --> 18 pulses in a frame of 64 samples.                       *
36* 88 bits --> 24 pulses in a frame of 64 samples.                       *
37*                                                                       *
38* All pulses can have two (2) possible amplitudes: +1 or -1.            *
39* Each pulse can have sixteen (16) possible positions.                  *
40*************************************************************************/
41
42#include "typedef.h"
43#include "basic_op.h"
44#include "math_op.h"
45#include "acelp.h"
46#include "cnst.h"
47
48#include "q_pulse.h"
49
50static Word16 tipos[36] = {
51	0, 1, 2, 3,                            /* starting point &ipos[0], 1st iter */
52	1, 2, 3, 0,                            /* starting point &ipos[4], 2nd iter */
53	2, 3, 0, 1,                            /* starting point &ipos[8], 3rd iter */
54	3, 0, 1, 2,                            /* starting point &ipos[12], 4th iter */
55	0, 1, 2, 3,
56	1, 2, 3, 0,
57	2, 3, 0, 1,
58	3, 0, 1, 2,
59	0, 1, 2, 3};                           /* end point for 24 pulses &ipos[35], 4th iter */
60
61#define NB_PULSE_MAX  24
62
63#define L_SUBFR   64
64#define NB_TRACK  4
65#define STEP      4
66#define NB_POS    16
67#define MSIZE     256
68#define NB_MAX    8
69#define NPMAXPT   ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK)
70
71/* Private functions */
72void cor_h_vec_012(
73		Word16 h[],                           /* (i) scaled impulse response                 */
74		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
75		Word16 track,                         /* (i) track to use                            */
76		Word16 sign[],                        /* (i) sign vector                             */
77		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
78		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
79		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
80		);
81
82void cor_h_vec_012_asm(
83		Word16 h[],                           /* (i) scaled impulse response                 */
84		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
85		Word16 track,                         /* (i) track to use                            */
86		Word16 sign[],                        /* (i) sign vector                             */
87		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
88		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
89		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
90		);
91
92void cor_h_vec_30(
93		Word16 h[],                           /* (i) scaled impulse response                 */
94		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
95		Word16 track,                         /* (i) track to use                            */
96		Word16 sign[],                        /* (i) sign vector                             */
97		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
98		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
99		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
100		);
101
102void search_ixiy(
103		Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
104		Word16 track_x,                       /* (i) track of pulse 1                   */
105		Word16 track_y,                       /* (i) track of pulse 2                   */
106		Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
107		Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
108		Word16 * ix,                          /* (o) position of pulse 1                */
109		Word16 * iy,                          /* (o) position of pulse 2                */
110		Word16 dn[],                          /* (i) corr. between target and h[]       */
111		Word16 dn2[],                         /* (i) vector of selected positions       */
112		Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
113		Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
114		Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
115		);
116
117
118void ACELP_4t64_fx(
119		Word16 dn[],                          /* (i) <12b : correlation between target x[] and H[]      */
120		Word16 cn[],                          /* (i) <12b : residual after long term prediction         */
121		Word16 H[],                           /* (i) Q12: impulse response of weighted synthesis filter */
122		Word16 code[],                        /* (o) Q9 : algebraic (fixed) codebook excitation         */
123		Word16 y[],                           /* (o) Q9 : filtered fixed codebook excitation            */
124		Word16 nbbits,                        /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits                */
125		Word16 ser_size,                      /* (i) : bit rate                                         */
126		Word16 _index[]                       /* (o) : index (20): 5+5+5+5 = 20 bits.                   */
127		/* (o) : index (36): 9+9+9+9 = 36 bits.                   */
128		/* (o) : index (44): 13+9+13+9 = 44 bits.                 */
129		/* (o) : index (52): 13+13+13+13 = 52 bits.               */
130		/* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits.       */
131		/* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits.     */
132		/* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits.   */
133		)
134{
135	Word32 i, j, k;
136	Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
137	Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
138	Word16 *p0, *p1, *p2, *p3, *psign;
139	Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
140	Word32 s, cor, L_tmp, L_index;
141	Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
142	Word16 ind[NPMAXPT * NB_TRACK];
143	Word16 codvec[NB_PULSE_MAX], nbpos[10];
144	Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
145	Word16 h_buf[4 * L_SUBFR];
146	Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
147	Word16 ipos[NB_PULSE_MAX];
148
149	switch (nbbits)
150	{
151		case 20:                               /* 20 bits, 4 pulses, 4 tracks */
152			nbiter = 4;                          /* 4x16x16=1024 loop */
153			alp = 8192;                          /* alp = 2.0 (Q12) */
154			nb_pulse = 4;
155			nbpos[0] = 4;
156			nbpos[1] = 8;
157			break;
158		case 36:                               /* 36 bits, 8 pulses, 4 tracks */
159			nbiter = 4;                          /* 4x20x16=1280 loop */
160			alp = 4096;                          /* alp = 1.0 (Q12) */
161			nb_pulse = 8;
162			nbpos[0] = 4;
163			nbpos[1] = 8;
164			nbpos[2] = 8;
165			break;
166		case 44:                               /* 44 bits, 10 pulses, 4 tracks */
167			nbiter = 4;                          /* 4x26x16=1664 loop */
168			alp = 4096;                          /* alp = 1.0 (Q12) */
169			nb_pulse = 10;
170			nbpos[0] = 4;
171			nbpos[1] = 6;
172			nbpos[2] = 8;
173			nbpos[3] = 8;
174			break;
175		case 52:                               /* 52 bits, 12 pulses, 4 tracks */
176			nbiter = 4;                          /* 4x26x16=1664 loop */
177			alp = 4096;                          /* alp = 1.0 (Q12) */
178			nb_pulse = 12;
179			nbpos[0] = 4;
180			nbpos[1] = 6;
181			nbpos[2] = 8;
182			nbpos[3] = 8;
183			break;
184		case 64:                               /* 64 bits, 16 pulses, 4 tracks */
185			nbiter = 3;                          /* 3x36x16=1728 loop */
186			alp = 3277;                          /* alp = 0.8 (Q12) */
187			nb_pulse = 16;
188			nbpos[0] = 4;
189			nbpos[1] = 4;
190			nbpos[2] = 6;
191			nbpos[3] = 6;
192			nbpos[4] = 8;
193			nbpos[5] = 8;
194			break;
195		case 72:                               /* 72 bits, 18 pulses, 4 tracks */
196			nbiter = 3;                          /* 3x35x16=1680 loop */
197			alp = 3072;                          /* alp = 0.75 (Q12) */
198			nb_pulse = 18;
199			nbpos[0] = 2;
200			nbpos[1] = 3;
201			nbpos[2] = 4;
202			nbpos[3] = 5;
203			nbpos[4] = 6;
204			nbpos[5] = 7;
205			nbpos[6] = 8;
206			break;
207		case 88:                               /* 88 bits, 24 pulses, 4 tracks */
208			if(ser_size > 462)
209				nbiter = 1;
210			else
211				nbiter = 2;                    /* 2x53x16=1696 loop */
212
213			alp = 2048;                          /* alp = 0.5 (Q12) */
214			nb_pulse = 24;
215			nbpos[0] = 2;
216			nbpos[1] = 2;
217			nbpos[2] = 3;
218			nbpos[3] = 4;
219			nbpos[4] = 5;
220			nbpos[5] = 6;
221			nbpos[6] = 7;
222			nbpos[7] = 8;
223			nbpos[8] = 8;
224			nbpos[9] = 8;
225			break;
226		default:
227			nbiter = 0;
228			alp = 0;
229			nb_pulse = 0;
230	}
231
232	for (i = 0; i < nb_pulse; i++)
233	{
234		codvec[i] = i;
235	}
236
237	/*----------------------------------------------------------------*
238	 * Find sign for each pulse position.                             *
239	 *----------------------------------------------------------------*/
240	/* calculate energy for normalization of cn[] and dn[] */
241	/* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
242#ifdef ASM_OPT                  /* asm optimization branch */
243	s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
244#else
245	s = Dot_product12(cn, cn, L_SUBFR, &exp);
246#endif
247
248	Isqrt_n(&s, &exp);
249	s = L_shl(s, (exp + 5));
250	k_cn = extract_h(L_add(s, 0x8000));
251
252	/* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
253#ifdef ASM_OPT                      /* asm optimization branch */
254	s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
255#else
256	s = Dot_product12(dn, dn, L_SUBFR, &exp);
257#endif
258
259	Isqrt_n(&s, &exp);
260	k_dn = (L_shl(s, (exp + 5 + 3)) + 0x8000) >> 16;    /* k_dn = 256..4096 */
261	k_dn = vo_mult_r(alp, k_dn);              /* alp in Q12 */
262
263	/* mix normalized cn[] and dn[] */
264	p0 = cn;
265	p1 = dn;
266	p2 = dn2;
267
268	for (i = 0; i < L_SUBFR/4; i++)
269	{
270		s = (k_cn* (*p0++))+(k_dn * (*p1++));
271		*p2++ = s >> 7;
272		s = (k_cn* (*p0++))+(k_dn * (*p1++));
273		*p2++ = s >> 7;
274		s = (k_cn* (*p0++))+(k_dn * (*p1++));
275		*p2++ = s >> 7;
276		s = (k_cn* (*p0++))+(k_dn * (*p1++));
277		*p2++ = s >> 7;
278	}
279
280	/* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[]    */
281	for(i = 0; i < L_SUBFR; i++)
282	{
283		val = dn[i];
284		ps = dn2[i];
285		if (ps >= 0)
286		{
287			sign[i] = 32767;             /* sign = +1 (Q12) */
288			vec[i] = -32768;
289		} else
290		{
291			sign[i] = -32768;            /* sign = -1 (Q12) */
292			vec[i] = 32767;
293			dn[i] = -val;
294			dn2[i] = -ps;
295		}
296	}
297	/*----------------------------------------------------------------*
298	 * Select NB_MAX position per track according to max of dn2[].    *
299	 *----------------------------------------------------------------*/
300	pos = 0;
301	for (i = 0; i < NB_TRACK; i++)
302	{
303		for (k = 0; k < NB_MAX; k++)
304		{
305			ps = -1;
306			for (j = i; j < L_SUBFR; j += STEP)
307			{
308				if(dn2[j] > ps)
309				{
310					ps = dn2[j];
311					pos = j;
312				}
313			}
314			dn2[pos] = (k - NB_MAX);     /* dn2 < 0 when position is selected */
315			if (k == 0)
316			{
317				pos_max[i] = pos;
318			}
319		}
320	}
321
322	/*--------------------------------------------------------------*
323	 * Scale h[] to avoid overflow and to get maximum of precision  *
324	 * on correlation.                                              *
325	 *                                                              *
326	 * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16).         *
327	 *  ==> This allow addition of 16 pulses without saturation.    *
328	 *                                                              *
329	 * Energy worst case (on resonant impulse response),            *
330	 * - energy of h[] is approximately MAX/16.                     *
331	 * - During search, the energy is divided by 8 to avoid         *
332	 *   overflow on "alp". (energy of h[] = MAX/128).              *
333	 *  ==> "alp" worst case detected is 22854 on sinusoidal wave.  *
334	 *--------------------------------------------------------------*/
335
336	/* impulse response buffer for fast computation */
337
338	h = h_buf;
339	h_inv = h_buf + (2 * L_SUBFR);
340	L_tmp = 0;
341	for (i = 0; i < L_SUBFR; i++)
342	{
343		*h++ = 0;
344		*h_inv++ = 0;
345		L_tmp += (H[i] * H[i]) << 1;
346	}
347	/* scale h[] down (/2) when energy of h[] is high with many pulses used */
348	val = extract_h(L_tmp);
349	h_shift = 0;
350
351	if ((nb_pulse >= 12) && (val > 1024))
352	{
353		h_shift = 1;
354	}
355	p0 = H;
356	p1 = h;
357	p2 = h_inv;
358
359	for (i = 0; i < L_SUBFR/4; i++)
360	{
361		*p1 = *p0++ >> h_shift;
362		*p2++ = -(*p1++);
363		*p1 = *p0++ >> h_shift;
364		*p2++ = -(*p1++);
365		*p1 = *p0++ >> h_shift;
366		*p2++ = -(*p1++);
367		*p1 = *p0++ >> h_shift;
368		*p2++ = -(*p1++);
369	}
370
371	/*------------------------------------------------------------*
372	 * Compute rrixix[][] needed for the codebook search.         *
373	 * This algorithm compute impulse response energy of all      *
374	 * positions (16) in each track (4).       Total = 4x16 = 64. *
375	 *------------------------------------------------------------*/
376
377	/* storage order --> i3i3, i2i2, i1i1, i0i0 */
378
379	/* Init pointers to last position of rrixix[] */
380	p0 = &rrixix[0][NB_POS - 1];
381	p1 = &rrixix[1][NB_POS - 1];
382	p2 = &rrixix[2][NB_POS - 1];
383	p3 = &rrixix[3][NB_POS - 1];
384
385	ptr_h1 = h;
386	cor = 0x00008000L;                             /* for rounding */
387	for (i = 0; i < NB_POS; i++)
388	{
389		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
390		ptr_h1++;
391		*p3-- = extract_h(cor);
392		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
393		ptr_h1++;
394		*p2-- = extract_h(cor);
395		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
396		ptr_h1++;
397		*p1-- = extract_h(cor);
398		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
399		ptr_h1++;
400		*p0-- = extract_h(cor);
401	}
402
403	/*------------------------------------------------------------*
404	 * Compute rrixiy[][] needed for the codebook search.         *
405	 * This algorithm compute correlation between 2 pulses        *
406	 * (2 impulses responses) in 4 possible adjacents tracks.     *
407	 * (track 0-1, 1-2, 2-3 and 3-0).     Total = 4x16x16 = 1024. *
408	 *------------------------------------------------------------*/
409
410	/* storage order --> i2i3, i1i2, i0i1, i3i0 */
411
412	pos = MSIZE - 1;
413	ptr_hf = h + 1;
414
415	for (k = 0; k < NB_POS; k++)
416	{
417		p3 = &rrixiy[2][pos];
418		p2 = &rrixiy[1][pos];
419		p1 = &rrixiy[0][pos];
420		p0 = &rrixiy[3][pos - NB_POS];
421
422		cor = 0x00008000L;                   /* for rounding */
423		ptr_h1 = h;
424		ptr_h2 = ptr_hf;
425
426		for (i = k + 1; i < NB_POS; i++)
427		{
428			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
429			ptr_h1++;
430			ptr_h2++;
431			*p3 = extract_h(cor);
432			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
433			ptr_h1++;
434			ptr_h2++;
435			*p2 = extract_h(cor);
436			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
437			ptr_h1++;
438			ptr_h2++;
439			*p1 = extract_h(cor);
440			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
441			ptr_h1++;
442			ptr_h2++;
443			*p0 = extract_h(cor);
444
445			p3 -= (NB_POS + 1);
446			p2 -= (NB_POS + 1);
447			p1 -= (NB_POS + 1);
448			p0 -= (NB_POS + 1);
449		}
450		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
451		ptr_h1++;
452		ptr_h2++;
453		*p3 = extract_h(cor);
454		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
455		ptr_h1++;
456		ptr_h2++;
457		*p2 = extract_h(cor);
458		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
459		ptr_h1++;
460		ptr_h2++;
461		*p1 = extract_h(cor);
462
463		pos -= NB_POS;
464		ptr_hf += STEP;
465	}
466
467	/* storage order --> i3i0, i2i3, i1i2, i0i1 */
468
469	pos = MSIZE - 1;
470	ptr_hf = h + 3;
471
472	for (k = 0; k < NB_POS; k++)
473	{
474		p3 = &rrixiy[3][pos];
475		p2 = &rrixiy[2][pos - 1];
476		p1 = &rrixiy[1][pos - 1];
477		p0 = &rrixiy[0][pos - 1];
478
479		cor = 0x00008000L;								/* for rounding */
480		ptr_h1 = h;
481		ptr_h2 = ptr_hf;
482
483		for (i = k + 1; i < NB_POS; i++)
484		{
485			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
486			ptr_h1++;
487			ptr_h2++;
488			*p3 = extract_h(cor);
489			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
490			ptr_h1++;
491			ptr_h2++;
492			*p2 = extract_h(cor);
493			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
494			ptr_h1++;
495			ptr_h2++;
496			*p1 = extract_h(cor);
497			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
498			ptr_h1++;
499			ptr_h2++;
500			*p0 = extract_h(cor);
501
502			p3 -= (NB_POS + 1);
503			p2 -= (NB_POS + 1);
504			p1 -= (NB_POS + 1);
505			p0 -= (NB_POS + 1);
506		}
507		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
508		ptr_h1++;
509		ptr_h2++;
510		*p3 = extract_h(cor);
511
512		pos--;
513		ptr_hf += STEP;
514	}
515
516	/*------------------------------------------------------------*
517	 * Modification of rrixiy[][] to take signs into account.     *
518	 *------------------------------------------------------------*/
519
520	p0 = &rrixiy[0][0];
521
522	for (k = 0; k < NB_TRACK; k++)
523	{
524		j_temp = (k + 1)&0x03;
525		for (i = k; i < L_SUBFR; i += STEP)
526		{
527			psign = sign;
528			if (psign[i] < 0)
529			{
530				psign = vec;
531			}
532			j = j_temp;
533			for (; j < L_SUBFR; j += STEP)
534			{
535				*p0 = vo_mult(*p0, psign[j]);
536				p0++;
537			}
538		}
539	}
540
541	/*-------------------------------------------------------------------*
542	 *                       Deep first search                           *
543	 *-------------------------------------------------------------------*/
544
545	psk = -1;
546	alpk = 1;
547
548	for (k = 0; k < nbiter; k++)
549	{
550		j_temp = k<<2;
551		for (i = 0; i < nb_pulse; i++)
552			ipos[i] = tipos[j_temp + i];
553
554		if(nbbits == 20)
555		{
556			pos = 0;
557			ps = 0;
558			alp = 0;
559			for (i = 0; i < L_SUBFR; i++)
560			{
561				vec[i] = 0;
562			}
563		} else if ((nbbits == 36) || (nbbits == 44))
564		{
565			/* first stage: fix 2 pulses */
566			pos = 2;
567
568			ix = ind[0] = pos_max[ipos[0]];
569			iy = ind[1] = pos_max[ipos[1]];
570			ps = dn[ix] + dn[iy];
571			i = ix >> 2;                /* ix / STEP */
572			j = iy >> 2;                /* iy / STEP */
573			s = rrixix[ipos[0]][i] << 13;
574			s += rrixix[ipos[1]][j] << 13;
575			i = (i << 4) + j;         /* (ix/STEP)*NB_POS + (iy/STEP) */
576			s += rrixiy[ipos[0]][i] << 14;
577			alp = (s + 0x8000) >> 16;
578			if (sign[ix] < 0)
579				p0 = h_inv - ix;
580			else
581				p0 = h - ix;
582			if (sign[iy] < 0)
583				p1 = h_inv - iy;
584			else
585				p1 = h - iy;
586
587			for (i = 0; i < L_SUBFR; i++)
588			{
589				vec[i] = (*p0++) + (*p1++);
590			}
591
592			if(nbbits == 44)
593			{
594				ipos[8] = 0;
595				ipos[9] = 1;
596			}
597		} else
598		{
599			/* first stage: fix 4 pulses */
600			pos = 4;
601
602			ix = ind[0] = pos_max[ipos[0]];
603			iy = ind[1] = pos_max[ipos[1]];
604			i = ind[2] = pos_max[ipos[2]];
605			j = ind[3] = pos_max[ipos[3]];
606			ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]);
607
608			if (sign[ix] < 0)
609				p0 = h_inv - ix;
610			else
611				p0 = h - ix;
612
613			if (sign[iy] < 0)
614				p1 = h_inv - iy;
615			else
616				p1 = h - iy;
617
618			if (sign[i] < 0)
619				p2 = h_inv - i;
620			else
621				p2 = h - i;
622
623			if (sign[j] < 0)
624				p3 = h_inv - j;
625			else
626				p3 = h - j;
627
628			L_tmp = 0L;
629			for(i = 0; i < L_SUBFR; i++)
630			{
631				vec[i]  = add1(add1(add1(*p0++, *p1++), *p2++), *p3++);
632				L_tmp  += (vec[i] * vec[i]) << 1;
633			}
634
635			alp = ((L_tmp >> 3) + 0x8000) >> 16;
636
637			if(nbbits == 72)
638			{
639				ipos[16] = 0;
640				ipos[17] = 1;
641			}
642		}
643
644		/* other stages of 2 pulses */
645
646		for (j = pos, st = 0; j < nb_pulse; j += 2, st++)
647		{
648			/*--------------------------------------------------*
649			 * Calculate correlation of all possible positions  *
650			 * of the next 2 pulses with previous fixed pulses. *
651			 * Each pulse can have 16 possible positions.       *
652			 *--------------------------------------------------*/
653			if(ipos[j] == 3)
654			{
655				cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
656			}
657			else
658			{
659#ifdef ASM_OPT                 /* asm optimization branch */
660				cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
661#else
662				cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
663#endif
664			}
665			/*--------------------------------------------------*
666			 * Find best positions of 2 pulses.                 *
667			 *--------------------------------------------------*/
668			search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp,
669					&ix, &iy, dn, dn2, cor_x, cor_y, rrixiy);
670
671			ind[j] = ix;
672			ind[j + 1] = iy;
673
674			if (sign[ix] < 0)
675				p0 = h_inv - ix;
676			else
677				p0 = h - ix;
678			if (sign[iy] < 0)
679				p1 = h_inv - iy;
680			else
681				p1 = h - iy;
682
683			for (i = 0; i < L_SUBFR; i+=4)
684			{
685				vec[i]   += add1((*p0++), (*p1++));
686				vec[i+1] += add1((*p0++), (*p1++));
687				vec[i+2] += add1((*p0++), (*p1++));
688				vec[i+3] += add1((*p0++), (*p1++));
689			}
690		}
691		/* memorise the best codevector */
692		ps = vo_mult(ps, ps);
693		s = vo_L_msu(vo_L_mult(alpk, ps), psk, alp);
694		if (s > 0)
695		{
696			psk = ps;
697			alpk = alp;
698			for (i = 0; i < nb_pulse; i++)
699			{
700				codvec[i] = ind[i];
701			}
702			for (i = 0; i < L_SUBFR; i++)
703			{
704				y[i] = vec[i];
705			}
706		}
707	}
708	/*-------------------------------------------------------------------*
709	 * Build the codeword, the filtered codeword and index of codevector.*
710	 *-------------------------------------------------------------------*/
711	for (i = 0; i < NPMAXPT * NB_TRACK; i++)
712	{
713		ind[i] = -1;
714	}
715	for (i = 0; i < L_SUBFR; i++)
716	{
717		code[i] = 0;
718		y[i] = vo_shr_r(y[i], 3);               /* Q12 to Q9 */
719	}
720	val = (512 >> h_shift);               /* codeword in Q9 format */
721	for (k = 0; k < nb_pulse; k++)
722	{
723		i = codvec[k];                       /* read pulse position */
724		j = sign[i];                         /* read sign           */
725		index = i >> 2;                 /* index = pos of pulse (0..15) */
726		track = (Word16) (i & 0x03);         /* track = i % NB_TRACK (0..3)  */
727
728		if (j > 0)
729		{
730			code[i] += val;
731			codvec[k] += 128;
732		} else
733		{
734			code[i] -= val;
735			index += NB_POS;
736		}
737
738		i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1));
739
740		while (ind[i] >= 0)
741		{
742			i += 1;
743		}
744		ind[i] = index;
745	}
746
747	k = 0;
748	/* Build index of codevector */
749	if(nbbits == 20)
750	{
751		for (track = 0; track < NB_TRACK; track++)
752		{
753			_index[track] = (Word16)(quant_1p_N1(ind[k], 4));
754			k += NPMAXPT;
755		}
756	} else if(nbbits == 36)
757	{
758		for (track = 0; track < NB_TRACK; track++)
759		{
760			_index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
761			k += NPMAXPT;
762		}
763	} else if(nbbits == 44)
764	{
765		for (track = 0; track < NB_TRACK - 2; track++)
766		{
767			_index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
768			k += NPMAXPT;
769		}
770		for (track = 2; track < NB_TRACK; track++)
771		{
772			_index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
773			k += NPMAXPT;
774		}
775	} else if(nbbits == 52)
776	{
777		for (track = 0; track < NB_TRACK; track++)
778		{
779			_index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
780			k += NPMAXPT;
781		}
782	} else if(nbbits == 64)
783	{
784		for (track = 0; track < NB_TRACK; track++)
785		{
786			L_index = quant_4p_4N(&ind[k], 4);
787			_index[track] = (Word16)((L_index >> 14) & 3);
788			_index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
789			k += NPMAXPT;
790		}
791	} else if(nbbits == 72)
792	{
793		for (track = 0; track < NB_TRACK - 2; track++)
794		{
795			L_index = quant_5p_5N(&ind[k], 4);
796			_index[track] = (Word16)((L_index >> 10) & 0x03FF);
797			_index[track + NB_TRACK] = (Word16)(L_index & 0x03FF);
798			k += NPMAXPT;
799		}
800		for (track = 2; track < NB_TRACK; track++)
801		{
802			L_index = quant_4p_4N(&ind[k], 4);
803			_index[track] = (Word16)((L_index >> 14) & 3);
804			_index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
805			k += NPMAXPT;
806		}
807	} else if(nbbits == 88)
808	{
809		for (track = 0; track < NB_TRACK; track++)
810		{
811			L_index = quant_6p_6N_2(&ind[k], 4);
812			_index[track] = (Word16)((L_index >> 11) & 0x07FF);
813			_index[track + NB_TRACK] = (Word16)(L_index & 0x07FF);
814			k += NPMAXPT;
815		}
816	}
817	return;
818}
819
820
821/*-------------------------------------------------------------------*
822 * Function  cor_h_vec()                                             *
823 * ~~~~~~~~~~~~~~~~~~~~~                                             *
824 * Compute correlations of h[] with vec[] for the specified track.   *
825 *-------------------------------------------------------------------*/
826void cor_h_vec_30(
827		Word16 h[],                           /* (i) scaled impulse response                 */
828		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
829		Word16 track,                         /* (i) track to use                            */
830		Word16 sign[],                        /* (i) sign vector                             */
831		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
832		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
833		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
834		)
835{
836	Word32 i, j, pos, corr;
837	Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
838	Word32 L_sum1,L_sum2;
839	cor_x = cor_1;
840	cor_y = cor_2;
841	p0 = rrixix[track];
842	p3 = rrixix[0];
843	pos = track;
844
845	for (i = 0; i < NB_POS; i+=2)
846	{
847		L_sum1 = L_sum2 = 0L;
848		p1 = h;
849		p2 = &vec[pos];
850		for (j=pos;j < L_SUBFR; j++)
851		{
852			L_sum1 += *p1 * *p2;
853			p2-=3;
854			L_sum2 += *p1++ * *p2;
855			p2+=4;
856		}
857		p2-=3;
858		L_sum2 += *p1++ * *p2++;
859		L_sum2 += *p1++ * *p2++;
860		L_sum2 += *p1++ * *p2++;
861
862		L_sum1 = (L_sum1 << 2);
863		L_sum2 = (L_sum2 << 2);
864
865		corr = vo_round(L_sum1);
866		*cor_x++ = vo_mult(corr, sign[pos]) + (*p0++);
867		corr = vo_round(L_sum2);
868		*cor_y++ = vo_mult(corr, sign[pos-3]) + (*p3++);
869		pos += STEP;
870
871		L_sum1 = L_sum2 = 0L;
872		p1 = h;
873		p2 = &vec[pos];
874		for (j=pos;j < L_SUBFR; j++)
875		{
876			L_sum1 += *p1 * *p2;
877			p2-=3;
878			L_sum2 += *p1++ * *p2;
879			p2+=4;
880		}
881		p2-=3;
882		L_sum2 += *p1++ * *p2++;
883		L_sum2 += *p1++ * *p2++;
884		L_sum2 += *p1++ * *p2++;
885
886		L_sum1 = (L_sum1 << 2);
887		L_sum2 = (L_sum2 << 2);
888
889		corr = vo_round(L_sum1);
890		*cor_x++ = vo_mult(corr, sign[pos]) + (*p0++);
891		corr = vo_round(L_sum2);
892		*cor_y++ = vo_mult(corr, sign[pos-3]) + (*p3++);
893		pos += STEP;
894	}
895	return;
896}
897
898void cor_h_vec_012(
899		Word16 h[],                           /* (i) scaled impulse response                 */
900		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
901		Word16 track,                         /* (i) track to use                            */
902		Word16 sign[],                        /* (i) sign vector                             */
903		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
904		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
905		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
906		)
907{
908	Word32 i, j, pos, corr;
909	Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
910	Word32 L_sum1,L_sum2;
911	cor_x = cor_1;
912	cor_y = cor_2;
913	p0 = rrixix[track];
914	p3 = rrixix[track+1];
915	pos = track;
916
917	for (i = 0; i < NB_POS; i+=2)
918	{
919		L_sum1 = L_sum2 = 0L;
920		p1 = h;
921		p2 = &vec[pos];
922		for (j=62-pos ;j >= 0; j--)
923		{
924			L_sum1 += *p1 * *p2++;
925			L_sum2 += *p1++ * *p2;
926		}
927		L_sum1 += *p1 * *p2;
928		L_sum1 = (L_sum1 << 2);
929		L_sum2 = (L_sum2 << 2);
930
931		corr = (L_sum1 + 0x8000) >> 16;
932		cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++);
933		corr = (L_sum2 + 0x8000) >> 16;
934		cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++);
935		pos += STEP;
936
937		L_sum1 = L_sum2 = 0L;
938		p1 = h;
939		p2 = &vec[pos];
940		for (j= 62-pos;j >= 0; j--)
941		{
942			L_sum1 += *p1 * *p2++;
943			L_sum2 += *p1++ * *p2;
944		}
945		L_sum1 += *p1 * *p2;
946		L_sum1 = (L_sum1 << 2);
947		L_sum2 = (L_sum2 << 2);
948
949		corr = (L_sum1 + 0x8000) >> 16;
950		cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++);
951		corr = (L_sum2 + 0x8000) >> 16;
952		cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++);
953		pos += STEP;
954	}
955	return;
956}
957
958/*-------------------------------------------------------------------*
959 * Function  search_ixiy()                                           *
960 * ~~~~~~~~~~~~~~~~~~~~~~~                                           *
961 * Find the best positions of 2 pulses in a subframe.                *
962 *-------------------------------------------------------------------*/
963
964void search_ixiy(
965		Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
966		Word16 track_x,                       /* (i) track of pulse 1                   */
967		Word16 track_y,                       /* (i) track of pulse 2                   */
968		Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
969		Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
970		Word16 * ix,                          /* (o) position of pulse 1                */
971		Word16 * iy,                          /* (o) position of pulse 2                */
972		Word16 dn[],                          /* (i) corr. between target and h[]       */
973		Word16 dn2[],                         /* (i) vector of selected positions       */
974		Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
975		Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
976		Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
977		)
978{
979	Word32 x, y, pos, thres_ix;
980	Word16 ps1, ps2, sq, sqk;
981	Word16 alp_16, alpk;
982	Word16 *p0, *p1, *p2;
983	Word32 s, alp0, alp1, alp2;
984
985	p0 = cor_x;
986	p1 = cor_y;
987	p2 = rrixiy[track_x];
988
989	thres_ix = nb_pos_ix - NB_MAX;
990
991	alp0 = L_deposit_h(*alp);
992	alp0 = (alp0 + 0x00008000L);       /* for rounding */
993
994	sqk = -1;
995	alpk = 1;
996
997	for (x = track_x; x < L_SUBFR; x += STEP)
998	{
999		ps1 = *ps + dn[x];
1000		alp1 = alp0 + ((*p0++)<<13);
1001
1002		if (dn2[x] < thres_ix)
1003		{
1004			pos = -1;
1005			for (y = track_y; y < L_SUBFR; y += STEP)
1006			{
1007				ps2 = add1(ps1, dn[y]);
1008
1009				alp2 = alp1 + ((*p1++)<<13);
1010				alp2 = alp2 + ((*p2++)<<14);
1011				alp_16 = extract_h(alp2);
1012				sq = vo_mult(ps2, ps2);
1013				s = vo_L_mult(alpk, sq) - ((sqk * alp_16)<<1);
1014
1015				if (s > 0)
1016				{
1017					sqk = sq;
1018					alpk = alp_16;
1019					pos = y;
1020				}
1021			}
1022			p1 -= NB_POS;
1023
1024			if (pos >= 0)
1025			{
1026				*ix = x;
1027				*iy = pos;
1028			}
1029		} else
1030		{
1031			p2 += NB_POS;
1032		}
1033	}
1034
1035	*ps = add1(*ps, add1(dn[*ix], dn[*iy]));
1036	*alp = alpk;
1037
1038	return;
1039}
1040
1041
1042
1043
1044