1ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/******************************************************************************
2ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
35b790feeeb211c42bf78ca3ae9c26aa30e516765Jakub Pawlowski *  Copyright 2014 The Android Open Source Project
49ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson *  Copyright 2003 - 2004 Open Interface North America, Inc. All rights
59ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson *                        reserved.
6ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
7ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *  Licensed under the Apache License, Version 2.0 (the "License");
8ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *  you may not use this file except in compliance with the License.
9ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *  You may obtain a copy of the License at:
10ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
11ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *  http://www.apache.org/licenses/LICENSE-2.0
12ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
13ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *  Unless required by applicable law or agreed to in writing, software
14ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *  distributed under the License is distributed on an "AS IS" BASIS,
15ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *  See the License for the specific language governing permissions and
17ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *  limitations under the License.
18ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
19ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta ******************************************************************************/
20ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
21ee96a3c60fca590d38025925c072d264e06493c4Myles Watson/*******************************************************************************
22ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta  $Revision: #1 $
23ee96a3c60fca590d38025925c072d264e06493c4Myles Watson ******************************************************************************/
24ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
25ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/** @file
26ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta@ingroup codec_internal
27ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta*/
28ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
29ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/**@addgroup codec_internal*/
30ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/**@{*/
31ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
32ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/*
33ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima
34ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * factorization. The scaling factors are folded into the windowing
35ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * constants. 29 adds and 5 16x32 multiplies per 8 samples.
36ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta */
37ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
38ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#include "oi_codec_sbc_private.h"
39ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
40911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define AAN_C4_FIX (759250125) /* S1.30  759250125   0.707107*/
41ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
42911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define AAN_C6_FIX (410903207) /* S1.30  410903207   0.382683*/
43ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
44911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define AAN_Q0_FIX (581104888) /* S1.30  581104888   0.541196*/
45ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
46911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define AAN_Q1_FIX (1402911301) /* S1.30 1402911301   1.306563*/
47ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
48ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/** Scales x by y bits to the right, adding a rounding factor.
49ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta */
50ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#ifndef SCALE
51911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define SCALE(x, y) (((x) + (1 << ((y)-1))) >> (y))
52ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#endif
53ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
54ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/**
55ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Default C language implementation of a 32x32->32 multiply. This function may
56ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * be replaced by a platform-specific version for speed.
57ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
58ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * @param u A signed 32-bit multiplicand
59ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * @param v A signed 32-bit multiplier
60ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
61ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * @return  A signed 32-bit value corresponding to the 32 most significant bits
62ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * of the 64-bit product of u and v.
63ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta */
64911d1ae03efec2d54c3b1b605589d790d1745488Myles WatsonINLINE int32_t default_mul_32s_32s_hi(int32_t u, int32_t v) {
65911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  uint32_t u0, v0;
66911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  int32_t u1, v1, w1, w2, t;
67911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
68911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  u0 = u & 0xFFFF;
69911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  u1 = u >> 16;
70911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  v0 = v & 0xFFFF;
71911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  v1 = v >> 16;
72911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  t = u0 * v0;
73911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  t = u1 * v0 + ((uint32_t)t >> 16);
74911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  w1 = t & 0xFFFF;
75911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  w2 = t >> 16;
76911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  w1 = u0 * v1 + w1;
77911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  return u1 * v1 + w2 + (w1 >> 16);
78ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta}
79ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
80ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y)
81ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
82ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#ifdef DEBUG_DCT
83911d1ae03efec2d54c3b1b605589d790d1745488Myles WatsonPRIVATE void float_dct2_8(float* RESTRICT out, int32_t const* RESTRICT in) {
84911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define FIX(x, bits) \
85911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  (((int)floor(0.5f + ((x) * ((float)(1 << bits))))) / ((float)(1 << bits)))
86911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define FLOAT_BUTTERFLY(x, y) \
87911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  x += y;                     \
88911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  y = x - (y * 2);            \
89911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(x));  \
90911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(y));
91911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define FLOAT_MULT_DCT(K, sample) (FIX(K, 20) * sample)
92ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y))))
93ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
94911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  double L00, L01, L02, L03, L04, L05, L06, L07;
95911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  double L25;
96911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
97911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  double in0, in1, in2, in3;
98911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  double in4, in5, in6, in7;
99911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
100911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN);
101911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(in0));
102911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN);
103911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(in1));
104911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN);
105911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(in2));
106911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN);
107911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(in3));
108911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN);
109911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(in4));
110911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN);
111911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(in5));
112911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN);
113911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(in6));
114911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN);
115911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(in7));
116911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
117911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L00 = (in0 + in7);
118911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L00));
119911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L01 = (in1 + in6);
120911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L01));
121911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L02 = (in2 + in5);
122911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L02));
123911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L03 = (in3 + in4);
124911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L03));
125911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
126911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 = (in3 - in4);
127911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L04));
128911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L05 = (in2 - in5);
129911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L05));
130911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 = (in1 - in6);
131911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L06));
132911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L07 = (in0 - in7);
133911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L07));
134911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
135911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  FLOAT_BUTTERFLY(L00, L03);
136911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  FLOAT_BUTTERFLY(L01, L02);
137911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
138911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L02 += L03;
139911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L02));
140911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
141911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02);
142911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L02));
143911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
144911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  FLOAT_BUTTERFLY(L00, L01);
145911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
146911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0);
147911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT16(out[0]));
148911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4);
149911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT16(out[4]));
150911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
151911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  FLOAT_BUTTERFLY(L03, L02);
152911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6);
153911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT16(out[6]));
154911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2);
155911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT16(out[2]));
156911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
157911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 += L05;
158911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L04));
159911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L05 += L06;
160911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L05));
161911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 += L07;
162911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L06));
163911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
164911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 /= 2;
165911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L05 /= 2;
166911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 /= 2;
167911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L07 /= 2;
168911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
169911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05);
170911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L05));
171911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
172911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L25 = L06 - L04;
173911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L25));
174911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25);
175911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L25));
176911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
177911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04);
178911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L04));
179911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 -= L25;
180911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L04));
181911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
182911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06);
183911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L06));
184911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 -= L25;
185911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT32(L25));
186911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
187911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  FLOAT_BUTTERFLY(L07, L05);
188911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
189911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  FLOAT_BUTTERFLY(L05, L04);
190911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3 - 1));
191911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT16(out[3]));
192911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5 - 1));
193911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT16(out[5]));
194911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson
195911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  FLOAT_BUTTERFLY(L07, L06);
196911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7 - 1));
197911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT16(out[7]));
198911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1 - 1));
199911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  OI_ASSERT(VALID_INT16(out[1]));
200ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta}
201ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#undef BUTTERFLY
202ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#endif
203ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
204ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/*
205ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * This function calculates the AAN DCT. Its inputs are in S16.15 format, as
206ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38
2079ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * (1244918057 integer). The function it computes is an approximation to the
2089ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * array defined by:
209ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
210ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * diag(aan_s) * AAN= C2
211ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
212ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *   or
213ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
214ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * AAN = diag(1/aan_s) * C2
215ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
216ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * where C2 is as it is defined in the comment at the head of this file, and
217ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
218ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1;
219ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
220ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * aan_s[i] = [ 1.000  0.510  0.541  0.601  0.707  0.900  1.307  2.563 ]
221ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
222ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * The output ranges are shown as follows:
223ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
224ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Let Y[0..7] = AAN * X[0..7]
225ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
226ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Without loss of generality, assume the input vector X consists of elements
227ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * between -1 and 1. The maximum possible value of a given output element occurs
228ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * with some particular combination of input vector elements each of which is -1
2299ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y
2309ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * is maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a
231ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * positive contribution to the sum. Equivalently, one may simply sum
232ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * abs(AAN)[t,i] over t to get the maximum possible value of Y[i].
233ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
2349ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * This yields approximately:
2359ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson *  [8.00  10.05   9.66   8.52   8.00   5.70   4.00   2.00]
236ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
237ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Given the maximum magnitude sensible input value of +/-37992, this yields the
238ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * following vector of maximum output magnitudes:
239ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
240ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * [ 303936  381820  367003  323692  303936  216555  151968   75984 ]
241ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
242ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Ultimately, these values must fit into 16 bit signed integers, so they must
243ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * be scaled. A non-uniform scaling helps maximize the kept precision. The
244ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * relative number of extra bits of precision maintainable with respect to the
245ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * largest value is given here:
246ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
247ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * [ 0  0  0  0  0  0  1  2 ]
248ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta *
249ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta */
250911d1ae03efec2d54c3b1b605589d790d1745488Myles WatsonPRIVATE void dct2_8(SBC_BUFFER_T* RESTRICT out, int32_t const* RESTRICT in) {
251911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define BUTTERFLY(x, y) \
252911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  x += (y);             \
253911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  (y) = (x) - ((y) << 1);
254911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K, x) << 2)
255ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
256911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  int32_t L00, L01, L02, L03, L04, L05, L06, L07;
257911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  int32_t L25;
258ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
259911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  int32_t in0, in1, in2, in3;
260911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  int32_t in4, in5, in6, in7;
261ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
262ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#if DCTII_8_SHIFT_IN != 0
263911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in0 = SCALE(in[0], DCTII_8_SHIFT_IN);
264911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in1 = SCALE(in[1], DCTII_8_SHIFT_IN);
265911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in2 = SCALE(in[2], DCTII_8_SHIFT_IN);
266911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in3 = SCALE(in[3], DCTII_8_SHIFT_IN);
267911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in4 = SCALE(in[4], DCTII_8_SHIFT_IN);
268911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in5 = SCALE(in[5], DCTII_8_SHIFT_IN);
269911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in6 = SCALE(in[6], DCTII_8_SHIFT_IN);
270911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in7 = SCALE(in[7], DCTII_8_SHIFT_IN);
271ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#else
272911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in0 = in[0];
273911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in1 = in[1];
274911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in2 = in[2];
275911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in3 = in[3];
276911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in4 = in[4];
277911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in5 = in[5];
278911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in6 = in[6];
279911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  in7 = in[7];
280ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#endif
281ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
282911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L00 = in0 + in7;
283911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L01 = in1 + in6;
284911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L02 = in2 + in5;
285911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L03 = in3 + in4;
286ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
287911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 = in3 - in4;
288911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L05 = in2 - in5;
289911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 = in1 - in6;
290911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L07 = in0 - in7;
291ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
292911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  BUTTERFLY(L00, L03);
293911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  BUTTERFLY(L01, L02);
294ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
295911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L02 += L03;
296ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
297911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L02 = FIX_MULT_DCT(AAN_C4_FIX, L02);
298ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
299911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  BUTTERFLY(L00, L01);
300ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
301911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[0] = (int16_t)SCALE(L00, DCTII_8_SHIFT_0);
302911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[4] = (int16_t)SCALE(L01, DCTII_8_SHIFT_4);
303ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
304911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  BUTTERFLY(L03, L02);
305911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[6] = (int16_t)SCALE(L02, DCTII_8_SHIFT_6);
306911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[2] = (int16_t)SCALE(L03, DCTII_8_SHIFT_2);
307ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
308911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 += L05;
309911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L05 += L06;
310911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 += L07;
311ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
312911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 /= 2;
313911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L05 /= 2;
314911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 /= 2;
315911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L07 /= 2;
316ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
317911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L05 = FIX_MULT_DCT(AAN_C4_FIX, L05);
318ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
319911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L25 = L06 - L04;
320911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L25 = FIX_MULT_DCT(AAN_C6_FIX, L25);
321ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
322911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04);
323911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L04 -= L25;
324ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
325911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06);
326911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  L06 -= L25;
327ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
328911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  BUTTERFLY(L07, L05);
329ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
330911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  BUTTERFLY(L05, L04);
331911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[3] = (int16_t)SCALE(L04, DCTII_8_SHIFT_3 - 1);
332911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[5] = (int16_t)SCALE(L05, DCTII_8_SHIFT_5 - 1);
333ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
334911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  BUTTERFLY(L07, L06);
335911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[7] = (int16_t)SCALE(L06, DCTII_8_SHIFT_7 - 1);
336911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  out[1] = (int16_t)SCALE(L07, DCTII_8_SHIFT_1 - 1);
337ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#undef BUTTERFLY
338ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
339ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#ifdef DEBUG_DCT
340911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  {
341911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson    float float_out[8];
342911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson    float_dct2_8(float_out, in);
343911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson  }
344ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#endif
345ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta}
346ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta
347ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/**@}*/
348