1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18/*
19------------------------------------------------------------------------------
20
21   PacketVideo Corp.
22   MP3 Decoder Library
23
24   Filename: pvmp3_dct_16.cpp
25
26   Functions:
27    dct_16
28    pv_merge_in_place_N32
29    pv_split
30
31     Date: 09/21/2007
32
33------------------------------------------------------------------------------
34 REVISION HISTORY
35
36
37 Description:
38
39------------------------------------------------------------------------------
40 INPUT AND OUTPUT DEFINITIONS
41
42    dct_16
43
44Input
45    int32 vec[],        input vector length 16
46    Int flag            processing direction: forward (1), backward ( 0)
47 Returns
48
49    int32 vec[],        dct length 16
50
51------------------------------------------------------------------------------
52 INPUT AND OUTPUT DEFINITIONS
53
54    pv_merge_in_place_N32
55
56Input
57    int32 vec[],        input vector length 16
58
59 Returns
60
61    int32 vec[],        merged  output of two dct 16 to create a dct 32
62
63------------------------------------------------------------------------------
64 INPUT AND OUTPUT DEFINITIONS
65
66    pv_split
67
68Input
69    int32 vec[],        input vector length 16
70
71 Returns
72
73    int32 vec[],        splitted even/odd and pre processing rotation
74
75------------------------------------------------------------------------------
76 FUNCTION DESCRIPTION
77
78    dct 16 and tools to assemble a dct32 output
79
80------------------------------------------------------------------------------
81 REQUIREMENTS
82
83
84------------------------------------------------------------------------------
85 REFERENCES
86
87------------------------------------------------------------------------------
88 PSEUDO-CODE
89
90------------------------------------------------------------------------------
91*/
92
93#if ( !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4) )
94/*----------------------------------------------------------------------------
95; INCLUDES
96----------------------------------------------------------------------------*/
97
98#include "pvmp3_dct_16.h"
99#include "pv_mp3dec_fxd_op.h"
100
101/*----------------------------------------------------------------------------
102; MACROS
103; Define module specific macros here
104----------------------------------------------------------------------------*/
105
106
107/*----------------------------------------------------------------------------
108; DEFINES
109; Include all pre-processor statements here. Include conditional
110; compile variables also.
111----------------------------------------------------------------------------*/
112#define Qfmt(a)   (int32)((a)*((int32)1<<27))
113
114/*----------------------------------------------------------------------------
115; LOCAL FUNCTION DEFINITIONS
116; Function Prototype declaration
117----------------------------------------------------------------------------*/
118
119/*----------------------------------------------------------------------------
120; LOCAL STORE/BUFFER/POINTER DEFINITIONS
121; Variable declaration - defined here and used outside this module
122----------------------------------------------------------------------------*/
123const int32 CosTable_dct32[16] =
124{
125    Qfmt_31(0.50060299823520F) ,  Qfmt_31(0.50547095989754F) ,
126    Qfmt_31(0.51544730992262F) ,  Qfmt_31(0.53104259108978F) ,
127    Qfmt_31(0.55310389603444F) ,  Qfmt_31(0.58293496820613F) ,
128    Qfmt_31(0.62250412303566F) ,  Qfmt_31(0.67480834145501F) ,
129    Qfmt_31(0.74453627100230F) ,  Qfmt_31(0.83934964541553F) ,
130
131    Qfmt(0.97256823786196F) ,  Qfmt(1.16943993343288F) ,
132    Qfmt(1.48416461631417F) ,  Qfmt(2.05778100995341F) ,
133    Qfmt(3.40760841846872F) ,  Qfmt(10.19000812354803F)
134};
135
136
137/*----------------------------------------------------------------------------
138; EXTERNAL FUNCTION REFERENCES
139; Declare functions defined elsewhere and referenced in this module
140----------------------------------------------------------------------------*/
141
142/*----------------------------------------------------------------------------
143; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
144; Declare variables used in this module but defined elsewhere
145----------------------------------------------------------------------------*/
146
147
148/*----------------------------------------------------------------------------
149; FUNCTION CODE
150----------------------------------------------------------------------------*/
151
152__attribute__((no_sanitize("integer")))
153void pvmp3_dct_16(int32 vec[], int32 flag)
154{
155    int32 tmp0;
156    int32 tmp1;
157    int32 tmp2;
158    int32 tmp3;
159    int32 tmp4;
160    int32 tmp5;
161    int32 tmp6;
162    int32 tmp7;
163    int32 tmp_o0;
164    int32 tmp_o1;
165    int32 tmp_o2;
166    int32 tmp_o3;
167    int32 tmp_o4;
168    int32 tmp_o5;
169    int32 tmp_o6;
170    int32 tmp_o7;
171    int32 itmp_e0;
172    int32 itmp_e1;
173    int32 itmp_e2;
174
175    /*  split input vector */
176
177    tmp_o0 = fxp_mul32_Q32((vec[ 0] - vec[15]), Qfmt_31(0.50241928618816F));
178    tmp0   =  vec[ 0] + vec[15];
179
180    tmp_o7 = fxp_mul32_Q32((vec[ 7] - vec[ 8]) << 3, Qfmt_31(0.63764357733614F));
181    tmp7   =  vec[ 7] + vec[ 8];
182
183    itmp_e0    = fxp_mul32_Q32((tmp0 - tmp7), Qfmt_31(0.50979557910416F));
184    tmp7 = (tmp0 + tmp7);
185
186    tmp_o1 = fxp_mul32_Q32((vec[ 1] - vec[14]), Qfmt_31(0.52249861493969F));
187    tmp1   =  vec[ 1] + vec[14];
188
189    tmp_o6 = fxp_mul32_Q32((vec[ 6] - vec[ 9]) << 1, Qfmt_31(0.86122354911916F));
190    tmp6   =  vec[ 6] + vec[ 9];
191
192
193
194    itmp_e1 = (tmp1 + tmp6);
195    tmp6    = fxp_mul32_Q32((tmp1 - tmp6), Qfmt_31(0.60134488693505F));
196
197
198
199    tmp_o2 = fxp_mul32_Q32((vec[ 2] - vec[13]), Qfmt_31(0.56694403481636F));
200    tmp2   =  vec[ 2] + vec[13];
201    tmp_o5 = fxp_mul32_Q32((vec[ 5] - vec[10]) << 1, Qfmt_31(0.53033884299517F));
202    tmp5   =  vec[ 5] + vec[10];
203
204    itmp_e2 = (tmp2 + tmp5);
205    tmp5    = fxp_mul32_Q32((tmp2 - tmp5), Qfmt_31(0.89997622313642F));
206
207    tmp_o3 = fxp_mul32_Q32((vec[ 3] - vec[12]), Qfmt_31(0.64682178335999F));
208    tmp3   =  vec[ 3] + vec[12];
209    tmp_o4 = fxp_mul32_Q32((vec[ 4] - vec[11]), Qfmt_31(0.78815462345125F));
210    tmp4   =  vec[ 4] + vec[11];
211
212    tmp1   = (tmp3 + tmp4);
213    tmp4   =  fxp_mul32_Q32((tmp3 - tmp4) << 2, Qfmt_31(0.64072886193538F));
214
215    /*  split even part of tmp_e */
216
217    tmp0 = (tmp7 + tmp1);
218    tmp1 = fxp_mul32_Q32((tmp7 - tmp1), Qfmt_31(0.54119610014620F));
219
220    tmp3 = fxp_mul32_Q32((itmp_e1 - itmp_e2) << 1, Qfmt_31(0.65328148243819F));
221    tmp7 = (itmp_e1 + itmp_e2);
222
223    vec[ 0]  = (tmp0 + tmp7) >> 1;
224    vec[ 8]  = fxp_mul32_Q32((tmp0 - tmp7), Qfmt_31(0.70710678118655F));
225    tmp0     = fxp_mul32_Q32((tmp1 - tmp3) << 1, Qfmt_31(0.70710678118655F));
226    vec[ 4]  =  tmp1 + tmp3 + tmp0;
227    vec[12]  =  tmp0;
228
229    /*  split odd part of tmp_e */
230
231    tmp1 = fxp_mul32_Q32((itmp_e0 - tmp4) << 1, Qfmt_31(0.54119610014620F));
232    tmp7 = itmp_e0 + tmp4;
233
234    tmp3  = fxp_mul32_Q32((tmp6 - tmp5) << 2, Qfmt_31(0.65328148243819F));
235    tmp6 += tmp5;
236
237    tmp4  = fxp_mul32_Q32((tmp7 - tmp6) << 1, Qfmt_31(0.70710678118655F));
238    tmp6 += tmp7;
239    tmp7  = fxp_mul32_Q32((tmp1 - tmp3) << 1, Qfmt_31(0.70710678118655F));
240
241    tmp1    +=  tmp3 + tmp7;
242    vec[ 2]  =  tmp1 + tmp6;
243    vec[ 6]  =  tmp1 + tmp4;
244    vec[10]  =  tmp7 + tmp4;
245    vec[14]  =  tmp7;
246
247
248    // dct8;
249
250    tmp1 = fxp_mul32_Q32((tmp_o0 - tmp_o7) << 1, Qfmt_31(0.50979557910416F));
251    tmp7 = tmp_o0 + tmp_o7;
252
253    tmp6   = tmp_o1 + tmp_o6;
254    tmp_o1 = fxp_mul32_Q32((tmp_o1 - tmp_o6) << 1, Qfmt_31(0.60134488693505F));
255
256    tmp5   = tmp_o2 + tmp_o5;
257    tmp_o5 = fxp_mul32_Q32((tmp_o2 - tmp_o5) << 1, Qfmt_31(0.89997622313642F));
258
259    tmp0 = fxp_mul32_Q32((tmp_o3 - tmp_o4) << 3, Qfmt_31(0.6407288619354F));
260    tmp4 = tmp_o3 + tmp_o4;
261
262    if (!flag)
263    {
264        tmp7   = -tmp7;
265        tmp1   = -tmp1;
266        tmp6   = -tmp6;
267        tmp_o1 = -tmp_o1;
268        tmp5   = -tmp5;
269        tmp_o5 = -tmp_o5;
270        tmp4   = -tmp4;
271        tmp0   = -tmp0;
272    }
273
274
275    tmp2     =  fxp_mul32_Q32((tmp1 -   tmp0) << 1, Qfmt_31(0.54119610014620F));
276    tmp0    +=  tmp1;
277    tmp1     =  fxp_mul32_Q32((tmp7 -   tmp4) << 1, Qfmt_31(0.54119610014620F));
278    tmp7    +=  tmp4;
279    tmp4     =  fxp_mul32_Q32((tmp6 -   tmp5) << 2, Qfmt_31(0.65328148243819F));
280    tmp6    +=  tmp5;
281    tmp5     =  fxp_mul32_Q32((tmp_o1 - tmp_o5) << 2, Qfmt_31(0.65328148243819F));
282    tmp_o1  += tmp_o5;
283
284
285    vec[13]  =  fxp_mul32_Q32((tmp1 -   tmp4) << 1, Qfmt_31(0.70710678118655F));
286    vec[ 5]  =  tmp1 + tmp4 + vec[13];
287
288    vec[ 9]  =  fxp_mul32_Q32((tmp7 -   tmp6) << 1, Qfmt_31(0.70710678118655F));
289    vec[ 1]  =  tmp7 + tmp6;
290
291    tmp4     =  fxp_mul32_Q32((tmp0 - tmp_o1) << 1, Qfmt_31(0.70710678118655F));
292    tmp0    +=  tmp_o1;
293    tmp6     =  fxp_mul32_Q32((tmp2 -   tmp5) << 1, Qfmt_31(0.70710678118655F));
294    tmp2    +=  tmp5 + tmp6;
295    tmp0    +=  tmp2;
296
297    vec[ 1] += tmp0;
298    vec[ 3]  = tmp0 + vec[ 5];
299    tmp2    += tmp4;
300    vec[ 5]  = tmp2 + vec[ 5];
301    vec[ 7]  = tmp2 + vec[ 9];
302    tmp4    += tmp6;
303    vec[ 9]  = tmp4 + vec[ 9];
304    vec[11]  = tmp4 + vec[13];
305    vec[13]  = tmp6 + vec[13];
306    vec[15]  = tmp6;
307
308}
309/*----------------------------------------------------------------------------
310; FUNCTION CODE
311----------------------------------------------------------------------------*/
312__attribute__((no_sanitize("integer")))
313void pvmp3_merge_in_place_N32(int32 vec[])
314{
315
316
317    int32 temp0;
318    int32 temp1;
319    int32 temp2;
320    int32 temp3;
321
322    temp0   = vec[14];
323    vec[14] = vec[ 7];
324    temp1   = vec[12];
325    vec[12] = vec[ 6];
326    temp2   = vec[10];
327    vec[10] = vec[ 5];
328    temp3   = vec[ 8];
329    vec[ 8] = vec[ 4];
330    vec[ 6] = vec[ 3];
331    vec[ 4] = vec[ 2];
332    vec[ 2] = vec[ 1];
333
334    vec[ 1] = (vec[16] + vec[17]);
335    vec[16] = temp3;
336    vec[ 3] = (vec[18] + vec[17]);
337    vec[ 5] = (vec[19] + vec[18]);
338    vec[18] = vec[9];
339
340    vec[ 7] = (vec[20] + vec[19]);
341    vec[ 9] = (vec[21] + vec[20]);
342    vec[20] = temp2;
343    temp2   = vec[13];
344    temp3   = vec[11];
345    vec[11] = (vec[22] + vec[21]);
346    vec[13] = (vec[23] + vec[22]);
347    vec[22] = temp3;
348    temp3   = vec[15];
349
350    vec[15] = (vec[24] + vec[23]);
351    vec[17] = (vec[25] + vec[24]);
352    vec[19] = (vec[26] + vec[25]);
353    vec[21] = (vec[27] + vec[26]);
354    vec[23] = (vec[28] + vec[27]);
355    vec[24] = temp1;
356    vec[25] = (vec[29] + vec[28]);
357    vec[26] = temp2;
358    vec[27] = (vec[30] + vec[29]);
359    vec[28] = temp0;
360    vec[29] = (vec[30] + vec[31]);
361    vec[30] = temp3;
362}
363
364
365/*----------------------------------------------------------------------------
366; FUNCTION CODE
367----------------------------------------------------------------------------*/
368
369
370
371__attribute__((no_sanitize("integer")))
372void pvmp3_split(int32 *vect)
373{
374
375    int32 i;
376    const int32 *pt_cosTerms = &CosTable_dct32[15];
377    int32 *pt_vect   = vect;
378    int32 *pt_vect_2 = pt_vect - 1;
379
380    for (i = 3; i != 0; i--)
381    {
382        int32 tmp2 = *(pt_vect);
383        int32 tmp1 = *(pt_vect_2);
384        int32 cosx = *(pt_cosTerms--);
385        *(pt_vect_2--) = (tmp1  + tmp2);
386        *(pt_vect++)   = fxp_mul32_Q27((tmp1 - tmp2), cosx);
387
388        tmp2 = *(pt_vect);
389        tmp1 = *(pt_vect_2);
390        cosx = *(pt_cosTerms--);
391        *(pt_vect_2--) = (tmp1  + tmp2);
392        *(pt_vect++)   = fxp_mul32_Q27((tmp1 - tmp2), cosx);
393
394    }
395
396    for (i = 5; i != 0; i--)
397    {
398        int32 tmp2 = *(pt_vect);
399        int32 tmp1 = *(pt_vect_2);
400        int32 cosx = *(pt_cosTerms--);
401        *(pt_vect_2--) = (tmp1  + tmp2);
402        *(pt_vect++) = fxp_mul32_Q32((tmp1 - tmp2) << 1, cosx);
403
404        tmp2 = *(pt_vect);
405        tmp1 = *(pt_vect_2);
406        cosx = *(pt_cosTerms--);
407        *(pt_vect_2--) = (tmp1  + tmp2);
408        *(pt_vect++) = fxp_mul32_Q32((tmp1 - tmp2) << 1, cosx);
409    }
410
411}
412
413#endif
414