cor_h.cpp revision b841f14f8e51f2365945281fbfa54ef6a1b1b5a6
1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18/****************************************************************************************
19Portions of this file are derived from the following 3GPP standard:
20
21    3GPP TS 26.073
22    ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec
23    Available from http://www.3gpp.org
24
25(C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC)
26Permission to distribute, modify and use this file under the standard license
27terms listed above has been obtained from the copyright holder.
28****************************************************************************************/
29/*
30------------------------------------------------------------------------------
31
32
33
34 Pathname: ./audio/gsm-amr/c/src/cor_h.c
35
36     Date: 06/12/2000
37
38------------------------------------------------------------------------------
39 REVISION HISTORY
40
41 Description: Updated template used to PV coding template. First attempt at
42          optimizing C code.
43
44 Description: Used MAX_16 and MIN_16 when checking the result of Inv_sqrt.
45          Synced up to the new template.
46
47 Description: Added setting of Overflow flag in inlined code.
48
49 Description: Took out cor_h_x function and put it in its own file. Sync'ed
50          up with the single_func_template.c template. Delete version
51          ID variable.
52
53 Description: Synchronized file with UTMS version 3.2.0. Updated coding
54              template. Removed unnecessary include files.
55
56 Description: Fixed portion of the code that builds the rr[] matrix. There
57              was an error in the original inlining of code that caused
58              the code to be not bit-exact with UMTS version 3.2.0.
59
60 Description: Added calls to L_add() and mult() in the code to handle overflow
61              scenario. Moved cor_h.h after cnst.h in the Include section.
62              Doing this allows the unit test to build using the cnst.h in the
63              /test/include directory. Fixed initialization of the accumulator
64              in the first calculation of the sum of squares.
65
66 Description: Made the following changes per comments from Phase 2/3 review:
67              1. Used #define value instead of hard-coded numbers in the code.
68              2. Fixed typecasting issue with TI C compiler.
69              3. Removed typecasting of 0x00008000L in the call to L_add.
70
71 Description: Changed pOverflow from a global variable into a function
72 parameter.
73
74 Description:
75            1. Added pointer to avoid adding offsets in every pass
76            2. Eliminate variables defined as registers
77            3. Removed extra check for overflow by doing scaling right
78               after overflow is detected.
79            4. Eliminated calls to basic operations (like extract) not
80               needed because of the nature of the number (all bounded)
81            5. Eliminated duplicate loop accessing same data
82            6. Simplified matrix addressing by use of pointers
83
84 Description:
85              1. Eliminated unused include files.
86              2. Access twice the number of points when delaing with matrices
87                 and in the process only 3 pointers (instead of 4) are needed
88              3. Replaced array addressing (array sign[]) by pointers
89
90 Description: Changed round function name to pv_round to avoid conflict with
91              round function in C standard library.
92
93 Description: Using inlines from fxp_arithmetic.h .
94
95 Description: Replacing fxp_arithmetic.h with basic_op.h.
96
97 Description:
98
99------------------------------------------------------------------------------
100*/
101
102/*----------------------------------------------------------------------------
103; INCLUDES
104----------------------------------------------------------------------------*/
105#include "cnst.h"
106#include "cor_h.h"
107#include "basicop_malloc.h"
108#include "inv_sqrt.h"
109#include "basic_op.h"
110
111/*----------------------------------------------------------------------------
112; MACROS
113; Define module specific macros here
114----------------------------------------------------------------------------*/
115
116/*----------------------------------------------------------------------------
117; DEFINES
118; Include all pre-processor statements here. Include conditional
119; compile variables also.
120----------------------------------------------------------------------------*/
121
122/*----------------------------------------------------------------------------
123; LOCAL FUNCTION DEFINITIONS
124; Function Prototype declaration
125----------------------------------------------------------------------------*/
126
127/*----------------------------------------------------------------------------
128; LOCAL STORE/BUFFER/POINTER DEFINITIONS
129; Variable declaration - defined here and used outside this module
130----------------------------------------------------------------------------*/
131
132/*
133------------------------------------------------------------------------------
134 FUNCTION NAME: cor_h
135------------------------------------------------------------------------------
136 INPUT AND OUTPUT DEFINITIONS
137
138 Inputs:
139    h = vector containing the impulse response of the weighted synthesis
140        filter; vector contents are of type Word16; vector length is
141        2 * L_SUBFR
142    sign = vector containing the sign information for the correlation
143           values; vector contents are of type Word16; vector length is
144           L_CODE
145    rr = autocorrelation matrix; matrix contents are of type Word16;
146         matrix dimension is L_CODE by L_CODE
147
148 Outputs:
149    rr contents are the newly calculated autocorrelation values
150
151 Returns:
152    None
153
154 Global Variables Used:
155    None
156
157 Local Variables Needed:
158    None
159
160------------------------------------------------------------------------------
161 FUNCTION DESCRIPTION
162
163 This function computes correlations of the impulse response (h) needed for
164 the codebook search, and includes the sign information into the correlations.
165
166 The correlations are given by:
167    rr[i][j] = sum_{n=i}^{L-1} h[n-i] h[n-j];   i>=j; i,j=0,...,L-1
168
169 The sign information is included by:
170    rr[i][j] = rr[i][j]*sign[i]*sign[j]
171
172------------------------------------------------------------------------------
173 REQUIREMENTS
174
175 None
176
177------------------------------------------------------------------------------
178 REFERENCES
179
180 cor_h.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
181
182------------------------------------------------------------------------------
183 PSEUDO-CODE
184
185void cor_h (
186    Word16 h[],         // (i) : impulse response of weighted synthesis
187                                 filter
188    Word16 sign[],      // (i) : sign of d[n]
189    Word16 rr[][L_CODE] // (o) : matrix of autocorrelation
190)
191{
192    Word16 i, j, k, dec, h2[L_CODE];
193    Word32 s;
194
195    // Scaling for maximum precision
196
197    s = 2;
198    for (i = 0; i < L_CODE; i++)
199        s = L_mac (s, h[i], h[i]);
200
201    j = sub (extract_h (s), 32767);
202    if (j == 0)
203    {
204        for (i = 0; i < L_CODE; i++)
205        {
206            h2[i] = shr (h[i], 1);
207        }
208    }
209    else
210    {
211        s = L_shr (s, 1);
212        k = extract_h (L_shl (Inv_sqrt (s), 7));
213        k = mult (k, 32440);                     // k = 0.99*k
214
215        for (i = 0; i < L_CODE; i++)
216        {
217            h2[i] = pv_round (L_shl (L_mult (h[i], k), 9));
218        }
219    }
220
221    // build matrix rr[]
222    s = 0;
223    i = L_CODE - 1;
224    for (k = 0; k < L_CODE; k++, i--)
225    {
226        s = L_mac (s, h2[k], h2[k]);
227        rr[i][i] = pv_round (s);
228    }
229
230    for (dec = 1; dec < L_CODE; dec++)
231    {
232        s = 0;
233        j = L_CODE - 1;
234        i = sub (j, dec);
235        for (k = 0; k < (L_CODE - dec); k++, i--, j--)
236        {
237            s = L_mac (s, h2[k], h2[k + dec]);
238            rr[j][i] = mult (pv_round (s), mult (sign[i], sign[j]));
239            rr[i][j] = rr[j][i];
240        }
241    }
242}
243
244---------------------------------------------------------------------------
245 RESOURCES USED [optional]
246
247 When the code is written for a specific target processor the
248 the resources used should be documented below.
249
250 HEAP MEMORY USED: x bytes
251
252 STACK MEMORY USED: x bytes
253
254 CLOCK CYCLES: (cycle count equation for this function) + (variable
255                used to represent cycle count for each subroutine
256                called)
257     where: (cycle count variable) = cycle count for [subroutine
258                                     name]
259
260------------------------------------------------------------------------------
261 CAUTION [optional]
262 [State any special notes, constraints or cautions for users of this function]
263
264------------------------------------------------------------------------------
265*/
266
267void cor_h(
268    Word16 h[],          /* (i) : impulse response of weighted synthesis
269                                  filter                                  */
270    Word16 sign[],       /* (i) : sign of d[n]                            */
271    Word16 rr[][L_CODE], /* (o) : matrix of autocorrelation               */
272    Flag  *pOverflow
273)
274{
275    register Word16 i;
276    register Word16 dec;
277
278    Word16 h2[L_CODE];
279    Word32 s;
280    Word32 s2;
281    Word16 tmp1;
282    Word16 tmp2;
283    Word16 tmp11;
284    Word16 tmp22;
285
286    Word16 *p_h;
287    Word16 *p_h2;
288    Word16 *rr1;
289    Word16 *rr2;
290    Word16 *rr3;
291    Word16 *p_rr_ref1;
292    Word16 *p_sign1;
293    Word16 *p_sign2;
294
295    /* Scaling for maximum precision */
296
297    /* Initialize accumulator to 1 since left shift happens    */
298    /* after the accumulation of the sum of squares (original  */
299    /* code initialized s to 2)                                */
300    s = 1;
301    p_h = h;
302
303    for (i = (L_CODE >> 1); i != 0 ; i--)
304    {
305        tmp1 = *(p_h++);
306        s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
307        tmp1 = *(p_h++);
308        s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
309
310    }
311
312    s <<= 1;
313
314    if (s & MIN_32)
315    {
316        p_h2 = h2;
317        p_h  = h;
318
319        for (i = (L_CODE >> 1); i != 0; i--)
320        {
321            *(p_h2++) =  *(p_h++)  >> 1;
322            *(p_h2++) =  *(p_h++)  >> 1;
323        }
324    }
325    else
326    {
327
328        s >>= 1;
329
330        s = Inv_sqrt(s, pOverflow);
331
332        if (s < (Word32) 0x00ffffffL)
333        {
334            /* k = 0.99*k */
335            dec = (Word16)(((s >> 9) * 32440) >> 15);
336        }
337        else
338        {
339            dec = 32440;  /* 0.99 */
340        }
341
342        p_h  = h;
343        p_h2 = h2;
344
345        for (i = (L_CODE >> 1); i != 0; i--)
346        {
347            *(p_h2++) = (Word16)((amrnb_fxp_mac_16_by_16bb((Word32) * (p_h++), (Word32) dec, 0x020L)) >> 6);
348            *(p_h2++) = (Word16)((amrnb_fxp_mac_16_by_16bb((Word32) * (p_h++), (Word32) dec, 0x020L)) >> 6);
349        }
350    }
351    /* build matrix rr[] */
352
353    s = 0;
354
355    p_h2 = h2;
356
357    rr1 = &rr[L_CODE-1][L_CODE-1];
358
359    for (i = L_CODE >> 1; i != 0 ; i--)
360    {
361        tmp1   = *(p_h2++);
362        s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
363        *rr1 = (Word16)((s + 0x00004000L) >> 15);
364        rr1 -= (L_CODE + 1);
365        tmp1   = *(p_h2++);
366        s = amrnb_fxp_mac_16_by_16bb((Word32) tmp1, (Word32) tmp1, s);
367        *rr1 = (Word16)((s + 0x00004000L) >> 15);
368        rr1 -= (L_CODE + 1);
369    }
370
371
372    p_rr_ref1 = rr[L_CODE-1];
373
374    for (dec = 1; dec < L_CODE; dec += 2)
375    {
376        rr1 = &p_rr_ref1[L_CODE-1-dec];
377
378        rr2 = &rr[L_CODE-1-dec][L_CODE-1];
379        rr3 = &rr[L_CODE-1-(dec+1)][L_CODE-1];
380
381        s  = 0;
382        s2 = 0;
383
384        p_sign1 = &sign[L_CODE - 1];
385        p_sign2 = &sign[L_CODE - 1 - dec];
386
387        p_h2 = h2;
388        p_h  = &h2[dec];
389
390        for (i = (L_CODE - dec - 1); i != 0 ; i--)
391        {
392            s = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2), (Word32) * (p_h++), s);
393            s2 = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2++), (Word32) * (p_h), s2);
394
395            tmp1  = (Word16)((s + 0x00004000L) >> 15);
396            tmp11 = (Word16)((s2 + 0x00004000L) >> 15);
397
398            tmp2  = ((Word32) * (p_sign1) * *(p_sign2--)) >> 15;
399            tmp22 = ((Word32) * (p_sign1--) * *(p_sign2)) >> 15;
400
401            *rr2 = ((Word32) tmp1 * tmp2) >> 15;
402            *(rr1--) = *rr2;
403            *rr1 = ((Word32) tmp11 * tmp22) >> 15;
404            *rr3 = *rr1;
405
406            rr1 -= (L_CODE);
407            rr2 -= (L_CODE + 1);
408            rr3 -= (L_CODE + 1);
409
410        }
411
412        s = amrnb_fxp_mac_16_by_16bb((Word32) * (p_h2), (Word32) * (p_h), s);
413
414        tmp1 = (Word16)((s + 0x00004000L) >> 15);
415
416        tmp2 = ((Word32) * (p_sign1) * *(p_sign2)) >> 15;
417        *rr1 = ((Word32) tmp1 * tmp2) >> 15;
418
419        *rr2 = *rr1;
420
421        rr1 -= (L_CODE + 1);
422        rr2 -= (L_CODE + 1);
423
424    }
425
426    return;
427
428}
429
430