1/******************************************************************************
2 *
3 *  Copyright 1999-2012 Broadcom Corporation
4 *
5 *  Licensed under the Apache License, Version 2.0 (the "License");
6 *  you may not use this file except in compliance with the License.
7 *  You may obtain a copy of the License at:
8 *
9 *  http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 *
17 ******************************************************************************/
18
19/******************************************************************************
20 *
21 *  source file for fast dct operations
22 *
23 ******************************************************************************/
24
25#include "sbc_dct.h"
26#include "sbc_enc_func_declare.h"
27#include "sbc_encoder.h"
28
29/*******************************************************************************
30 *
31 * Function         SBC_FastIDCT8
32 *
33 * Description      implementation of fast DCT algorithm by Feig and Winograd
34 *
35 *
36 * Returns          y = dct(pInVect)
37 *
38 *
39 ******************************************************************************/
40
41#if (SBC_IS_64_MULT_IN_IDCT == FALSE)
42#define SBC_COS_PI_SUR_4                              \
43  (0x00005a82) /* ((0x8000) * 0.7071)     = cos(pi/4) \
44                  */
45#define SBC_COS_PI_SUR_8 \
46  (0x00007641) /* ((0x8000) * 0.9239)     = (cos(pi/8)) */
47#define SBC_COS_3PI_SUR_8 \
48  (0x000030fb) /* ((0x8000) * 0.3827)     = (cos(3*pi/8)) */
49#define SBC_COS_PI_SUR_16 \
50  (0x00007d8a) /* ((0x8000) * 0.9808))     = (cos(pi/16)) */
51#define SBC_COS_3PI_SUR_16 \
52  (0x00006a6d) /* ((0x8000) * 0.8315))     = (cos(3*pi/16)) */
53#define SBC_COS_5PI_SUR_16 \
54  (0x0000471c) /* ((0x8000) * 0.5556))     = (cos(5*pi/16)) */
55#define SBC_COS_7PI_SUR_16 \
56  (0x000018f8) /* ((0x8000) * 0.1951))     = (cos(7*pi/16)) */
57#define SBC_IDCT_MULT(a, b, c) SBC_MULT_32_16_SIMPLIFIED(a, b, c)
58#else
59#define SBC_COS_PI_SUR_4 \
60  (0x5A827999) /* ((0x80000000) * 0.707106781)      = (cos(pi/4)   ) */
61#define SBC_COS_PI_SUR_8 \
62  (0x7641AF3C) /* ((0x80000000) * 0.923879533)      = (cos(pi/8)   ) */
63#define SBC_COS_3PI_SUR_8 \
64  (0x30FBC54D) /* ((0x80000000) * 0.382683432)      = (cos(3*pi/8) ) */
65#define SBC_COS_PI_SUR_16 \
66  (0x7D8A5F3F) /* ((0x80000000) * 0.98078528 ))     = (cos(pi/16)  ) */
67#define SBC_COS_3PI_SUR_16 \
68  (0x6A6D98A4) /* ((0x80000000) * 0.831469612))     = (cos(3*pi/16)) */
69#define SBC_COS_5PI_SUR_16 \
70  (0x471CECE6) /* ((0x80000000) * 0.555570233))     = (cos(5*pi/16)) */
71#define SBC_COS_7PI_SUR_16 \
72  (0x18F8B83C) /* ((0x80000000) * 0.195090322))     = (cos(7*pi/16)) */
73#define SBC_IDCT_MULT(a, b, c) SBC_MULT_32_32(a, b, c)
74#endif /* SBC_IS_64_MULT_IN_IDCT */
75
76#if (SBC_FAST_DCT == FALSE)
77extern const int16_t gas16AnalDCTcoeff8[];
78extern const int16_t gas16AnalDCTcoeff4[];
79#endif
80
81void SBC_FastIDCT8(int32_t* pInVect, int32_t* pOutVect) {
82#if (SBC_FAST_DCT == TRUE)
83#if (SBC_ARM_ASM_OPT == TRUE)
84#else
85#if (SBC_IPAQ_OPT == TRUE)
86#if (SBC_IS_64_MULT_IN_IDCT == TRUE)
87  int64_t s64Temp;
88#endif
89#else
90#if (SBC_IS_64_MULT_IN_IDCT == TRUE)
91  int32_t s32HiTemp;
92#else
93  int32_t s32In2Temp;
94  register int32_t s32In1Temp;
95#endif
96#endif
97#endif
98
99  register int32_t x0, x1, x2, x3, x4, x5, x6, x7, temp;
100  int32_t res_even[4], res_odd[4];
101  /*x0= (pInVect[4])/2 ;*/
102  SBC_IDCT_MULT(SBC_COS_PI_SUR_4, pInVect[4], x0);
103  /*printf("x0 0x%x = %d = %d * %d\n", x0, x0, SBC_COS_PI_SUR_4, pInVect[4]);*/
104
105  x1 = (pInVect[3] + pInVect[5]) >> 1;
106  x2 = (pInVect[2] + pInVect[6]) >> 1;
107  x3 = (pInVect[1] + pInVect[7]) >> 1;
108  x4 = (pInVect[0] + pInVect[8]) >> 1;
109  x5 = (pInVect[9] - pInVect[15]) >> 1;
110  x6 = (pInVect[10] - pInVect[14]) >> 1;
111  x7 = (pInVect[11] - pInVect[13]) >> 1;
112
113  /* 2-point IDCT of x0 and x4 as in (11) */
114  temp = x0;
115  SBC_IDCT_MULT(SBC_COS_PI_SUR_4, (x0 + x4),
116                x0); /*x0 = ( x0 + x4 ) * cos(1*pi/4) ; */
117  SBC_IDCT_MULT(SBC_COS_PI_SUR_4, (temp - x4),
118                x4); /*x4 = ( temp - x4 ) * cos(1*pi/4) ; */
119
120  /* rearrangement of x2 and x6 as in (15) */
121  x2 -= x6;
122  x6 <<= 1;
123
124  /* 2-point IDCT of x2 and x6 and post-multiplication as in (15) */
125  SBC_IDCT_MULT(SBC_COS_PI_SUR_4, x6, x6); /*x6 = x6 * cos(1*pi/4) ; */
126  temp = x2;
127  SBC_IDCT_MULT(SBC_COS_PI_SUR_8, (x2 + x6),
128                x2); /*x2 = ( x2 + x6 ) * cos(1*pi/8) ; */
129  SBC_IDCT_MULT(SBC_COS_3PI_SUR_8, (temp - x6),
130                x6); /*x6 = ( temp - x6 ) * cos(3*pi/8) ;*/
131
132  /* 4-point IDCT of x0,x2,x4 and x6 as in (11) */
133  res_even[0] = x0 + x2;
134  res_even[1] = x4 + x6;
135  res_even[2] = x4 - x6;
136  res_even[3] = x0 - x2;
137
138  /* rearrangement of x1,x3,x5,x7 as in (15) */
139  x7 <<= 1;
140  x5 = (x5 << 1) - x7;
141  x3 = (x3 << 1) - x5;
142  x1 -= x3 >> 1;
143
144  /* two-dimensional IDCT of x1 and x5 */
145  SBC_IDCT_MULT(SBC_COS_PI_SUR_4, x5, x5); /*x5 = x5 * cos(1*pi/4) ; */
146  temp = x1;
147  x1 = x1 + x5;
148  x5 = temp - x5;
149
150  /* rearrangement of x3 and x7 as in (15) */
151  x3 -= x7;
152  x7 <<= 1;
153  SBC_IDCT_MULT(SBC_COS_PI_SUR_4, x7, x7); /*x7 = x7 * cos(1*pi/4) ; */
154
155  /* 2-point IDCT of x3 and x7 and post-multiplication as in (15) */
156  temp = x3;
157  SBC_IDCT_MULT(SBC_COS_PI_SUR_8, (x3 + x7),
158                x3); /*x3 = ( x3 + x7 ) * cos(1*pi/8)  ; */
159  SBC_IDCT_MULT(SBC_COS_3PI_SUR_8, (temp - x7),
160                x7); /*x7 = ( temp - x7 ) * cos(3*pi/8) ;*/
161
162  /* 4-point IDCT of x1,x3,x5 and x7 and post multiplication by diagonal matrix
163   * as in (14) */
164  SBC_IDCT_MULT((SBC_COS_PI_SUR_16), (x1 + x3),
165                res_odd[0]); /*res_odd[ 0 ] = ( x1 + x3 ) * cos(1*pi/16) ; */
166  SBC_IDCT_MULT((SBC_COS_3PI_SUR_16), (x5 + x7),
167                res_odd[1]); /*res_odd[ 1 ] = ( x5 + x7 ) * cos(3*pi/16) ; */
168  SBC_IDCT_MULT((SBC_COS_5PI_SUR_16), (x5 - x7),
169                res_odd[2]); /*res_odd[ 2 ] = ( x5 - x7 ) * cos(5*pi/16) ; */
170  SBC_IDCT_MULT((SBC_COS_7PI_SUR_16), (x1 - x3),
171                res_odd[3]); /*res_odd[ 3 ] = ( x1 - x3 ) * cos(7*pi/16) ; */
172
173  /* additions and subtractions as in (9) */
174  pOutVect[0] = (res_even[0] + res_odd[0]);
175  pOutVect[1] = (res_even[1] + res_odd[1]);
176  pOutVect[2] = (res_even[2] + res_odd[2]);
177  pOutVect[3] = (res_even[3] + res_odd[3]);
178  pOutVect[7] = (res_even[0] - res_odd[0]);
179  pOutVect[6] = (res_even[1] - res_odd[1]);
180  pOutVect[5] = (res_even[2] - res_odd[2]);
181  pOutVect[4] = (res_even[3] - res_odd[3]);
182#else
183  uint8_t Index, k;
184  int32_t temp;
185  /*Calculate 4 subband samples by matrixing*/
186  for (Index = 0; Index < 8; Index++) {
187    temp = 0;
188    for (k = 0; k < 16; k++) {
189      /*temp += (int32_t)(((int64_t)M[(Index*strEncParams->numOfSubBands*2)+k] *
190       * Y[k]) >> 16 );*/
191      temp += (gas16AnalDCTcoeff8[(Index * 8 * 2) + k] * (pInVect[k] >> 16));
192      temp +=
193          ((gas16AnalDCTcoeff8[(Index * 8 * 2) + k] * (pInVect[k] & 0xFFFF)) >>
194           16);
195    }
196    pOutVect[Index] = temp;
197  }
198#endif
199  /*    printf("pOutVect: 0x%x;0x%x;0x%x;0x%x;0x%x;0x%x;0x%x;0x%x\n",\
200          pOutVect[0],pOutVect[1],pOutVect[2],pOutVect[3],pOutVect[4],pOutVect[5],pOutVect[6],pOutVect[7]);*/
201}
202
203/*******************************************************************************
204 *
205 * Function         SBC_FastIDCT4
206 *
207 * Description      implementation of fast DCT algorithm by Feig and Winograd
208 *
209 *
210 * Returns          y = dct(x0)
211 *
212 *
213 ******************************************************************************/
214void SBC_FastIDCT4(int32_t* pInVect, int32_t* pOutVect) {
215#if (SBC_FAST_DCT == TRUE)
216#if (SBC_ARM_ASM_OPT == TRUE)
217#else
218#if (SBC_IPAQ_OPT == TRUE)
219#if (SBC_IS_64_MULT_IN_IDCT == TRUE)
220  int64_t s64Temp;
221#endif
222#else
223#if (SBC_IS_64_MULT_IN_IDCT == TRUE)
224  int32_t s32HiTemp;
225#else
226  uint16_t s32In2Temp;
227  int32_t s32In1Temp;
228#endif
229#endif
230#endif
231  int32_t temp, x2;
232  int32_t tmp[8];
233
234  x2 = pInVect[2] >> 1;
235  temp = (pInVect[0] + pInVect[4]);
236  SBC_IDCT_MULT((SBC_COS_PI_SUR_4 >> 1), temp, tmp[0]);
237  tmp[1] = x2 - tmp[0];
238  tmp[0] += x2;
239  temp = (pInVect[1] + pInVect[3]);
240  SBC_IDCT_MULT((SBC_COS_3PI_SUR_8 >> 1), temp, tmp[3]);
241  SBC_IDCT_MULT((SBC_COS_PI_SUR_8 >> 1), temp, tmp[2]);
242  temp = (pInVect[5] - pInVect[7]);
243  SBC_IDCT_MULT((SBC_COS_3PI_SUR_8 >> 1), temp, tmp[5]);
244  SBC_IDCT_MULT((SBC_COS_PI_SUR_8 >> 1), temp, tmp[4]);
245  tmp[6] = tmp[2] + tmp[5];
246  tmp[7] = tmp[3] - tmp[4];
247  pOutVect[0] = (tmp[0] + tmp[6]);
248  pOutVect[1] = (tmp[1] + tmp[7]);
249  pOutVect[2] = (tmp[1] - tmp[7]);
250  pOutVect[3] = (tmp[0] - tmp[6]);
251#else
252  uint8_t Index, k;
253  int32_t temp;
254  /*Calculate 4 subband samples by matrixing*/
255  for (Index = 0; Index < 4; Index++) {
256    temp = 0;
257    for (k = 0; k < 8; k++) {
258      /*temp += (int32_t)(((int64_t)M[(Index*strEncParams->numOfSubBands*2)+k] *
259       * Y[k]) >> 16 ); */
260      temp += (gas16AnalDCTcoeff4[(Index * 4 * 2) + k] * (pInVect[k] >> 16));
261      temp +=
262          ((gas16AnalDCTcoeff4[(Index * 4 * 2) + k] * (pInVect[k] & 0xFFFF)) >>
263           16);
264    }
265    pOutVect[Index] = temp;
266  }
267#endif
268}
269