1/******************************************************************************
2 *
3 *  Copyright (C) 2014 The Android Open Source Project
4 *  Copyright 2003 - 2004 Open Interface North America, Inc. All rights reserved.
5 *
6 *  Licensed under the Apache License, Version 2.0 (the "License");
7 *  you may not use this file except in compliance with the License.
8 *  You may obtain a copy of the License at:
9 *
10 *  http://www.apache.org/licenses/LICENSE-2.0
11 *
12 *  Unless required by applicable law or agreed to in writing, software
13 *  distributed under the License is distributed on an "AS IS" BASIS,
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 *  See the License for the specific language governing permissions and
16 *  limitations under the License.
17 *
18 ******************************************************************************/
19
20/**********************************************************************************
21  $Revision: #1 $
22***********************************************************************************/
23
24/** @file
25@ingroup codec_internal
26*/
27
28/**@addgroup codec_internal*/
29/**@{*/
30
31/*
32 * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima
33 * factorization. The scaling factors are folded into the windowing
34 * constants. 29 adds and 5 16x32 multiplies per 8 samples.
35 */
36
37#include "oi_codec_sbc_private.h"
38
39#define AAN_C4_FIX (759250125)/* S1.30  759250125   0.707107*/
40
41#define AAN_C6_FIX (410903207)/* S1.30  410903207   0.382683*/
42
43#define AAN_Q0_FIX (581104888)/* S1.30  581104888   0.541196*/
44
45#define AAN_Q1_FIX (1402911301)/* S1.30 1402911301   1.306563*/
46
47/** Scales x by y bits to the right, adding a rounding factor.
48 */
49#ifndef SCALE
50#define SCALE(x, y) (((x) + (1 <<((y)-1))) >> (y))
51#endif
52
53/**
54 * Default C language implementation of a 32x32->32 multiply. This function may
55 * be replaced by a platform-specific version for speed.
56 *
57 * @param u A signed 32-bit multiplicand
58 * @param v A signed 32-bit multiplier
59
60 * @return  A signed 32-bit value corresponding to the 32 most significant bits
61 * of the 64-bit product of u and v.
62 */
63INLINE OI_INT32 default_mul_32s_32s_hi(OI_INT32 u, OI_INT32 v)
64{
65    OI_UINT32 u0, v0;
66    OI_INT32 u1, v1, w1, w2, t;
67
68    u0 = u & 0xFFFF; u1 = u >> 16;
69    v0 = v & 0xFFFF; v1 = v >> 16;
70    t = u0*v0;
71    t = u1*v0 + ((OI_UINT32)t >> 16);
72    w1 = t & 0xFFFF;
73    w2 = t >> 16;
74    w1 = u0*v1 + w1;
75    return u1*v1 + w2 + (w1 >> 16);
76}
77
78#define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y)
79
80
81#ifdef DEBUG_DCT
82PRIVATE void float_dct2_8(float * RESTRICT out, OI_INT32 const *RESTRICT in)
83{
84#define FIX(x,bits) (((int)floor(0.5f+((x)*((float)(1<<bits)))))/((float)(1<<bits)))
85#define FLOAT_BUTTERFLY(x,y) x += y; y = x - (y*2); OI_ASSERT(VALID_INT32(x)); OI_ASSERT(VALID_INT32(y));
86#define FLOAT_MULT_DCT(K, sample) (FIX(K,20) * sample)
87#define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y))))
88
89    double L00,L01,L02,L03,L04,L05,L06,L07;
90    double L25;
91
92    double in0,in1,in2,in3;
93    double in4,in5,in6,in7;
94
95    in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in0));
96    in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in1));
97    in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in2));
98    in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in3));
99    in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in4));
100    in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in5));
101    in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in6));
102    in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in7));
103
104    L00 = (in0 + in7); OI_ASSERT(VALID_INT32(L00));
105    L01 = (in1 + in6); OI_ASSERT(VALID_INT32(L01));
106    L02 = (in2 + in5); OI_ASSERT(VALID_INT32(L02));
107    L03 = (in3 + in4); OI_ASSERT(VALID_INT32(L03));
108
109    L04 = (in3 - in4); OI_ASSERT(VALID_INT32(L04));
110    L05 = (in2 - in5); OI_ASSERT(VALID_INT32(L05));
111    L06 = (in1 - in6); OI_ASSERT(VALID_INT32(L06));
112    L07 = (in0 - in7); OI_ASSERT(VALID_INT32(L07));
113
114    FLOAT_BUTTERFLY(L00, L03);
115    FLOAT_BUTTERFLY(L01, L02);
116
117    L02 += L03; OI_ASSERT(VALID_INT32(L02));
118
119    L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02); OI_ASSERT(VALID_INT32(L02));
120
121    FLOAT_BUTTERFLY(L00, L01);
122
123    out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0); OI_ASSERT(VALID_INT16(out[0]));
124    out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4); OI_ASSERT(VALID_INT16(out[4]));
125
126    FLOAT_BUTTERFLY(L03, L02);
127    out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6); OI_ASSERT(VALID_INT16(out[6]));
128    out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2); OI_ASSERT(VALID_INT16(out[2]));
129
130    L04 += L05; OI_ASSERT(VALID_INT32(L04));
131    L05 += L06; OI_ASSERT(VALID_INT32(L05));
132    L06 += L07; OI_ASSERT(VALID_INT32(L06));
133
134    L04/=2;
135    L05/=2;
136    L06/=2;
137    L07/=2;
138
139    L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05); OI_ASSERT(VALID_INT32(L05));
140
141    L25 = L06 - L04; OI_ASSERT(VALID_INT32(L25));
142    L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25); OI_ASSERT(VALID_INT32(L25));
143
144    L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04); OI_ASSERT(VALID_INT32(L04));
145    L04 -= L25; OI_ASSERT(VALID_INT32(L04));
146
147    L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06); OI_ASSERT(VALID_INT32(L06));
148    L06 -= L25; OI_ASSERT(VALID_INT32(L25));
149
150    FLOAT_BUTTERFLY(L07, L05);
151
152    FLOAT_BUTTERFLY(L05, L04);
153    out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3-1)); OI_ASSERT(VALID_INT16(out[3]));
154    out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5-1)); OI_ASSERT(VALID_INT16(out[5]));
155
156    FLOAT_BUTTERFLY(L07, L06);
157    out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7-1)); OI_ASSERT(VALID_INT16(out[7]));
158    out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1-1)); OI_ASSERT(VALID_INT16(out[1]));
159}
160#undef BUTTERFLY
161#endif
162
163
164/*
165 * This function calculates the AAN DCT. Its inputs are in S16.15 format, as
166 * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38
167 * (1244918057 integer). The function it computes is an approximation to the array defined
168 * by:
169 *
170 * diag(aan_s) * AAN= C2
171 *
172 *   or
173 *
174 * AAN = diag(1/aan_s) * C2
175 *
176 * where C2 is as it is defined in the comment at the head of this file, and
177 *
178 * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1;
179 *
180 * aan_s[i] = [ 1.000  0.510  0.541  0.601  0.707  0.900  1.307  2.563 ]
181 *
182 * The output ranges are shown as follows:
183 *
184 * Let Y[0..7] = AAN * X[0..7]
185 *
186 * Without loss of generality, assume the input vector X consists of elements
187 * between -1 and 1. The maximum possible value of a given output element occurs
188 * with some particular combination of input vector elements each of which is -1
189 * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y is
190 * maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a
191 * positive contribution to the sum. Equivalently, one may simply sum
192 * abs(AAN)[t,i] over t to get the maximum possible value of Y[i].
193 *
194 * This yields approximately [8.00  10.05   9.66   8.52   8.00   5.70   4.00   2.00]
195 *
196 * Given the maximum magnitude sensible input value of +/-37992, this yields the
197 * following vector of maximum output magnitudes:
198 *
199 * [ 303936  381820  367003  323692  303936  216555  151968   75984 ]
200 *
201 * Ultimately, these values must fit into 16 bit signed integers, so they must
202 * be scaled. A non-uniform scaling helps maximize the kept precision. The
203 * relative number of extra bits of precision maintainable with respect to the
204 * largest value is given here:
205 *
206 * [ 0  0  0  0  0  0  1  2 ]
207 *
208 */
209PRIVATE void dct2_8(SBC_BUFFER_T * RESTRICT out, OI_INT32 const *RESTRICT in)
210{
211#define BUTTERFLY(x,y) x += y; y = x - (y<<1);
212#define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K,x)<<2)
213
214    OI_INT32 L00,L01,L02,L03,L04,L05,L06,L07;
215    OI_INT32 L25;
216
217    OI_INT32 in0,in1,in2,in3;
218    OI_INT32 in4,in5,in6,in7;
219
220#if DCTII_8_SHIFT_IN != 0
221    in0 = SCALE(in[0], DCTII_8_SHIFT_IN);
222    in1 = SCALE(in[1], DCTII_8_SHIFT_IN);
223    in2 = SCALE(in[2], DCTII_8_SHIFT_IN);
224    in3 = SCALE(in[3], DCTII_8_SHIFT_IN);
225    in4 = SCALE(in[4], DCTII_8_SHIFT_IN);
226    in5 = SCALE(in[5], DCTII_8_SHIFT_IN);
227    in6 = SCALE(in[6], DCTII_8_SHIFT_IN);
228    in7 = SCALE(in[7], DCTII_8_SHIFT_IN);
229#else
230    in0 = in[0];
231    in1 = in[1];
232    in2 = in[2];
233    in3 = in[3];
234    in4 = in[4];
235    in5 = in[5];
236    in6 = in[6];
237    in7 = in[7];
238#endif
239
240    L00 = in0 + in7;
241    L01 = in1 + in6;
242    L02 = in2 + in5;
243    L03 = in3 + in4;
244
245    L04 = in3 - in4;
246    L05 = in2 - in5;
247    L06 = in1 - in6;
248    L07 = in0 - in7;
249
250    BUTTERFLY(L00, L03);
251    BUTTERFLY(L01, L02);
252
253    L02 += L03;
254
255    L02 = FIX_MULT_DCT(AAN_C4_FIX, L02);
256
257    BUTTERFLY(L00, L01);
258
259    out[0] = (OI_INT16)SCALE(L00, DCTII_8_SHIFT_0);
260    out[4] = (OI_INT16)SCALE(L01, DCTII_8_SHIFT_4);
261
262    BUTTERFLY(L03, L02);
263    out[6] = (OI_INT16)SCALE(L02, DCTII_8_SHIFT_6);
264    out[2] = (OI_INT16)SCALE(L03, DCTII_8_SHIFT_2);
265
266    L04 += L05;
267    L05 += L06;
268    L06 += L07;
269
270    L04/=2;
271    L05/=2;
272    L06/=2;
273    L07/=2;
274
275    L05 = FIX_MULT_DCT(AAN_C4_FIX, L05);
276
277    L25 = L06 - L04;
278    L25 = FIX_MULT_DCT(AAN_C6_FIX, L25);
279
280    L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04);
281    L04 -= L25;
282
283    L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06);
284    L06 -= L25;
285
286    BUTTERFLY(L07, L05);
287
288    BUTTERFLY(L05, L04);
289    out[3] = (OI_INT16)SCALE(L04, DCTII_8_SHIFT_3-1);
290    out[5] = (OI_INT16)SCALE(L05, DCTII_8_SHIFT_5-1);
291
292    BUTTERFLY(L07, L06);
293    out[7] = (OI_INT16)SCALE(L06, DCTII_8_SHIFT_7-1);
294    out[1] = (OI_INT16)SCALE(L07, DCTII_8_SHIFT_1-1);
295#undef BUTTERFLY
296
297#ifdef DEBUG_DCT
298    {
299        float float_out[8];
300        float_dct2_8(float_out, in);
301    }
302#endif
303}
304
305/**@}*/
306