1ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/****************************************************************************** 2ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 35b790feeeb211c42bf78ca3ae9c26aa30e516765Jakub Pawlowski * Copyright 2014 The Android Open Source Project 49ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * Copyright 2003 - 2004 Open Interface North America, Inc. All rights 59ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * reserved. 6ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 7ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Licensed under the Apache License, Version 2.0 (the "License"); 8ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * you may not use this file except in compliance with the License. 9ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * You may obtain a copy of the License at: 10ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 11ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * http://www.apache.org/licenses/LICENSE-2.0 12ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 13ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Unless required by applicable law or agreed to in writing, software 14ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * distributed under the License is distributed on an "AS IS" BASIS, 15ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * See the License for the specific language governing permissions and 17ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * limitations under the License. 18ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 19ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta ******************************************************************************/ 20ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 21ee96a3c60fca590d38025925c072d264e06493c4Myles Watson/******************************************************************************* 22ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta $Revision: #1 $ 23ee96a3c60fca590d38025925c072d264e06493c4Myles Watson ******************************************************************************/ 24ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 25ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/** @file 26ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta@ingroup codec_internal 27ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta*/ 28ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 29ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/**@addgroup codec_internal*/ 30ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/**@{*/ 31ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 32ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/* 33ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima 34ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * factorization. The scaling factors are folded into the windowing 35ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * constants. 29 adds and 5 16x32 multiplies per 8 samples. 36ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta */ 37ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 38ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#include "oi_codec_sbc_private.h" 39ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 40911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define AAN_C4_FIX (759250125) /* S1.30 759250125 0.707107*/ 41ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 42911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define AAN_C6_FIX (410903207) /* S1.30 410903207 0.382683*/ 43ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 44911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define AAN_Q0_FIX (581104888) /* S1.30 581104888 0.541196*/ 45ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 46911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define AAN_Q1_FIX (1402911301) /* S1.30 1402911301 1.306563*/ 47ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 48ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/** Scales x by y bits to the right, adding a rounding factor. 49ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta */ 50ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#ifndef SCALE 51911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define SCALE(x, y) (((x) + (1 << ((y)-1))) >> (y)) 52ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#endif 53ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 54ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/** 55ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Default C language implementation of a 32x32->32 multiply. This function may 56ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * be replaced by a platform-specific version for speed. 57ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 58ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * @param u A signed 32-bit multiplicand 59ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * @param v A signed 32-bit multiplier 60ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 61ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * @return A signed 32-bit value corresponding to the 32 most significant bits 62ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * of the 64-bit product of u and v. 63ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta */ 64911d1ae03efec2d54c3b1b605589d790d1745488Myles WatsonINLINE int32_t default_mul_32s_32s_hi(int32_t u, int32_t v) { 65911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson uint32_t u0, v0; 66911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson int32_t u1, v1, w1, w2, t; 67911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 68911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson u0 = u & 0xFFFF; 69911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson u1 = u >> 16; 70911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson v0 = v & 0xFFFF; 71911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson v1 = v >> 16; 72911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson t = u0 * v0; 73911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson t = u1 * v0 + ((uint32_t)t >> 16); 74911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson w1 = t & 0xFFFF; 75911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson w2 = t >> 16; 76911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson w1 = u0 * v1 + w1; 77911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson return u1 * v1 + w2 + (w1 >> 16); 78ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta} 79ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 80ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y) 81ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 82ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#ifdef DEBUG_DCT 83911d1ae03efec2d54c3b1b605589d790d1745488Myles WatsonPRIVATE void float_dct2_8(float* RESTRICT out, int32_t const* RESTRICT in) { 84911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define FIX(x, bits) \ 85911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson (((int)floor(0.5f + ((x) * ((float)(1 << bits))))) / ((float)(1 << bits))) 86911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define FLOAT_BUTTERFLY(x, y) \ 87911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson x += y; \ 88911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson y = x - (y * 2); \ 89911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(x)); \ 90911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(y)); 91911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define FLOAT_MULT_DCT(K, sample) (FIX(K, 20) * sample) 92ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y)))) 93ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 94911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson double L00, L01, L02, L03, L04, L05, L06, L07; 95911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson double L25; 96911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 97911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson double in0, in1, in2, in3; 98911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson double in4, in5, in6, in7; 99911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 100911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN); 101911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(in0)); 102911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN); 103911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(in1)); 104911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN); 105911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(in2)); 106911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN); 107911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(in3)); 108911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN); 109911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(in4)); 110911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN); 111911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(in5)); 112911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN); 113911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(in6)); 114911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN); 115911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(in7)); 116911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 117911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L00 = (in0 + in7); 118911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L00)); 119911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L01 = (in1 + in6); 120911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L01)); 121911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L02 = (in2 + in5); 122911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L02)); 123911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L03 = (in3 + in4); 124911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L03)); 125911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 126911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 = (in3 - in4); 127911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L04)); 128911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L05 = (in2 - in5); 129911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L05)); 130911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 = (in1 - in6); 131911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L06)); 132911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L07 = (in0 - in7); 133911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L07)); 134911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 135911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson FLOAT_BUTTERFLY(L00, L03); 136911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson FLOAT_BUTTERFLY(L01, L02); 137911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 138911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L02 += L03; 139911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L02)); 140911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 141911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02); 142911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L02)); 143911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 144911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson FLOAT_BUTTERFLY(L00, L01); 145911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 146911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0); 147911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT16(out[0])); 148911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4); 149911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT16(out[4])); 150911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 151911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson FLOAT_BUTTERFLY(L03, L02); 152911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6); 153911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT16(out[6])); 154911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2); 155911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT16(out[2])); 156911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 157911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 += L05; 158911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L04)); 159911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L05 += L06; 160911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L05)); 161911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 += L07; 162911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L06)); 163911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 164911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 /= 2; 165911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L05 /= 2; 166911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 /= 2; 167911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L07 /= 2; 168911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 169911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05); 170911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L05)); 171911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 172911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L25 = L06 - L04; 173911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L25)); 174911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25); 175911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L25)); 176911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 177911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04); 178911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L04)); 179911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 -= L25; 180911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L04)); 181911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 182911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06); 183911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L06)); 184911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 -= L25; 185911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT32(L25)); 186911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 187911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson FLOAT_BUTTERFLY(L07, L05); 188911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 189911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson FLOAT_BUTTERFLY(L05, L04); 190911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3 - 1)); 191911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT16(out[3])); 192911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5 - 1)); 193911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT16(out[5])); 194911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson 195911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson FLOAT_BUTTERFLY(L07, L06); 196911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7 - 1)); 197911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT16(out[7])); 198911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1 - 1)); 199911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson OI_ASSERT(VALID_INT16(out[1])); 200ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta} 201ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#undef BUTTERFLY 202ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#endif 203ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 204ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/* 205ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * This function calculates the AAN DCT. Its inputs are in S16.15 format, as 206ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38 2079ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * (1244918057 integer). The function it computes is an approximation to the 2089ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * array defined by: 209ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 210ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * diag(aan_s) * AAN= C2 211ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 212ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * or 213ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 214ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * AAN = diag(1/aan_s) * C2 215ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 216ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * where C2 is as it is defined in the comment at the head of this file, and 217ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 218ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1; 219ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 220ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * aan_s[i] = [ 1.000 0.510 0.541 0.601 0.707 0.900 1.307 2.563 ] 221ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 222ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * The output ranges are shown as follows: 223ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 224ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Let Y[0..7] = AAN * X[0..7] 225ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 226ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Without loss of generality, assume the input vector X consists of elements 227ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * between -1 and 1. The maximum possible value of a given output element occurs 228ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * with some particular combination of input vector elements each of which is -1 2299ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y 2309ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * is maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a 231ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * positive contribution to the sum. Equivalently, one may simply sum 232ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * abs(AAN)[t,i] over t to get the maximum possible value of Y[i]. 233ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 2349ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * This yields approximately: 2359ca07091a1f07ea201cee0504dab6a1d7073d429Myles Watson * [8.00 10.05 9.66 8.52 8.00 5.70 4.00 2.00] 236ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 237ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Given the maximum magnitude sensible input value of +/-37992, this yields the 238ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * following vector of maximum output magnitudes: 239ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 240ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * [ 303936 381820 367003 323692 303936 216555 151968 75984 ] 241ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 242ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * Ultimately, these values must fit into 16 bit signed integers, so they must 243ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * be scaled. A non-uniform scaling helps maximize the kept precision. The 244ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * relative number of extra bits of precision maintainable with respect to the 245ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * largest value is given here: 246ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 247ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * [ 0 0 0 0 0 0 1 2 ] 248ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta * 249ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta */ 250911d1ae03efec2d54c3b1b605589d790d1745488Myles WatsonPRIVATE void dct2_8(SBC_BUFFER_T* RESTRICT out, int32_t const* RESTRICT in) { 251911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define BUTTERFLY(x, y) \ 252911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson x += (y); \ 253911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson (y) = (x) - ((y) << 1); 254911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson#define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K, x) << 2) 255ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 256911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson int32_t L00, L01, L02, L03, L04, L05, L06, L07; 257911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson int32_t L25; 258ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 259911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson int32_t in0, in1, in2, in3; 260911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson int32_t in4, in5, in6, in7; 261ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 262ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#if DCTII_8_SHIFT_IN != 0 263911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in0 = SCALE(in[0], DCTII_8_SHIFT_IN); 264911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in1 = SCALE(in[1], DCTII_8_SHIFT_IN); 265911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in2 = SCALE(in[2], DCTII_8_SHIFT_IN); 266911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in3 = SCALE(in[3], DCTII_8_SHIFT_IN); 267911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in4 = SCALE(in[4], DCTII_8_SHIFT_IN); 268911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in5 = SCALE(in[5], DCTII_8_SHIFT_IN); 269911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in6 = SCALE(in[6], DCTII_8_SHIFT_IN); 270911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in7 = SCALE(in[7], DCTII_8_SHIFT_IN); 271ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#else 272911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in0 = in[0]; 273911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in1 = in[1]; 274911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in2 = in[2]; 275911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in3 = in[3]; 276911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in4 = in[4]; 277911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in5 = in[5]; 278911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in6 = in[6]; 279911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson in7 = in[7]; 280ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#endif 281ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 282911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L00 = in0 + in7; 283911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L01 = in1 + in6; 284911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L02 = in2 + in5; 285911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L03 = in3 + in4; 286ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 287911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 = in3 - in4; 288911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L05 = in2 - in5; 289911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 = in1 - in6; 290911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L07 = in0 - in7; 291ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 292911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson BUTTERFLY(L00, L03); 293911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson BUTTERFLY(L01, L02); 294ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 295911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L02 += L03; 296ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 297911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L02 = FIX_MULT_DCT(AAN_C4_FIX, L02); 298ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 299911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson BUTTERFLY(L00, L01); 300ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 301911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[0] = (int16_t)SCALE(L00, DCTII_8_SHIFT_0); 302911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[4] = (int16_t)SCALE(L01, DCTII_8_SHIFT_4); 303ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 304911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson BUTTERFLY(L03, L02); 305911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[6] = (int16_t)SCALE(L02, DCTII_8_SHIFT_6); 306911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[2] = (int16_t)SCALE(L03, DCTII_8_SHIFT_2); 307ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 308911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 += L05; 309911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L05 += L06; 310911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 += L07; 311ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 312911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 /= 2; 313911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L05 /= 2; 314911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 /= 2; 315911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L07 /= 2; 316ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 317911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L05 = FIX_MULT_DCT(AAN_C4_FIX, L05); 318ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 319911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L25 = L06 - L04; 320911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L25 = FIX_MULT_DCT(AAN_C6_FIX, L25); 321ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 322911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04); 323911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L04 -= L25; 324ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 325911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06); 326911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson L06 -= L25; 327ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 328911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson BUTTERFLY(L07, L05); 329ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 330911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson BUTTERFLY(L05, L04); 331911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[3] = (int16_t)SCALE(L04, DCTII_8_SHIFT_3 - 1); 332911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[5] = (int16_t)SCALE(L05, DCTII_8_SHIFT_5 - 1); 333ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 334911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson BUTTERFLY(L07, L06); 335911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[7] = (int16_t)SCALE(L06, DCTII_8_SHIFT_7 - 1); 336911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson out[1] = (int16_t)SCALE(L07, DCTII_8_SHIFT_1 - 1); 337ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#undef BUTTERFLY 338ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 339ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#ifdef DEBUG_DCT 340911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson { 341911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson float float_out[8]; 342911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson float_dct2_8(float_out, in); 343911d1ae03efec2d54c3b1b605589d790d1745488Myles Watson } 344ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta#endif 345ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta} 346ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta 347ce24765fe7620c34e8d88ed4f826c8a6917582b2Hemant Gupta/**@}*/ 348