1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* 2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Use of this source code is governed by a BSD-style license 5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * that can be found in the LICENSE file in the root of the source 6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * tree. An additional intellectual property rights grant can be found 7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * in the file PATENTS. All contributing project authors may 8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * be found in the AUTHORS file in the root of the source tree. 9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 11ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include <assert.h> 12ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include <math.h> 135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "./vpx_config.h" 155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vp9_rtcd.h" 16ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 17ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_blockd.h" 18ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_idct.h" 195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_systemdependent.h" 20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 212ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianstatic INLINE int fdct_round_shift(int input) { 222ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); 232ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian assert(INT16_MIN <= rv && rv <= INT16_MAX); 242ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian return rv; 252ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian} 265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void fdct4(const int16_t *input, int16_t *output) { 28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t step[4]; 29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int temp1, temp2; 30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 31ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[0] = input[0] + input[3]; 32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[1] = input[1] + input[2]; 33ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[2] = input[1] - input[2]; 34ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[3] = input[0] - input[3]; 35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step[0] + step[1]) * cospi_16_64; 37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step[0] - step[1]) * cospi_16_64; 382ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[0] = fdct_round_shift(temp1); 392ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[2] = fdct_round_shift(temp2); 40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; 41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; 422ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[1] = fdct_round_shift(temp1); 432ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[3] = fdct_round_shift(temp2); 44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 46ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid vp9_fdct4x4_1_c(const int16_t *input, int16_t *output, int stride) { 47ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int r, c; 48ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int16_t sum = 0; 49ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (r = 0; r < 4; ++r) 50ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (c = 0; c < 4; ++c) 51ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian sum += input[r * stride + c]; 52ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 53ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian output[0] = sum << 1; 54ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian output[1] = 0; 55ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 56ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { 58ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // The 2D transform is done with two passes which are actually pretty 59ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // similar. In the first one, we transform the columns and transpose 60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // the results. In the second one, we transform the rows. To achieve that, 612ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // as the first pass results are transposed, we transpose the columns (that 62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // is the transposed rows) and transpose the results (so that it goes back 63ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // in normal/row positions). 64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int pass; 65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // We need an intermediate buffer between passes. 66ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t intermediate[4 * 4]; 675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *in = input; 68ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *out = intermediate; 69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Do the two transform/transpose passes 70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (pass = 0; pass < 2; ++pass) { 71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int input[4]; 72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int step[4]; 73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*needs32*/ int temp1, temp2; 74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i; 75ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; ++i) { 76ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Load inputs. 77ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (0 == pass) { 785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[0] = in[0 * stride] * 16; 795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[1] = in[1 * stride] * 16; 805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[2] = in[2 * stride] * 16; 815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[3] = in[3 * stride] * 16; 82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (i == 0 && input[0]) { 83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[0] += 1; 84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } else { 86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[0] = in[0 * 4]; 87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[1] = in[1 * 4]; 88ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[2] = in[2 * 4]; 89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[3] = in[3 * 4]; 90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Transform. 92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[0] = input[0] + input[3]; 93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[1] = input[1] + input[2]; 94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[2] = input[1] - input[2]; 95ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[3] = input[0] - input[3]; 96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step[0] + step[1]) * cospi_16_64; 97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step[0] - step[1]) * cospi_16_64; 982ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[0] = fdct_round_shift(temp1); 992ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[2] = fdct_round_shift(temp2); 100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; 101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; 1022ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[1] = fdct_round_shift(temp1); 1032ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[3] = fdct_round_shift(temp2); 104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Do next column (which is a transposed row in second/horizontal pass) 105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang in++; 106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang out += 4; 107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Setup in/out for next pass. 109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang in = intermediate; 110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang out = output; 111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang { 114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; ++i) { 116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 4; ++j) 117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[j + i * 4] = (output[j + i * 4] + 1) >> 2; 118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void fadst4(const int16_t *input, int16_t *output) { 123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x0, x1, x2, x3; 124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int s0, s1, s2, s3, s4, s5, s6, s7; 125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = input[0]; 127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = input[1]; 128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = input[2]; 129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = input[3]; 130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (!(x0 | x1 | x2 | x3)) { 132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = output[1] = output[2] = output[3] = 0; 133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return; 134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = sinpi_1_9 * x0; 137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = sinpi_4_9 * x0; 138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = sinpi_2_9 * x1; 139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = sinpi_1_9 * x1; 140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = sinpi_3_9 * x2; 141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = sinpi_4_9 * x3; 142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = sinpi_2_9 * x3; 143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = x0 + x1 - x3; 144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s2 + s5; 146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = sinpi_3_9 * s7; 147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s1 - s3 + s6; 148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s4; 149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0 + x3; 151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x1; 152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2 - x3; 153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x2 - x0 + x3; 154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // 1-D transform scaling factor is sqrt(2). 1562ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[0] = fdct_round_shift(s0); 1572ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[1] = fdct_round_shift(s1); 1582ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[2] = fdct_round_shift(s2); 1592ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[3] = fdct_round_shift(s3); 160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic const transform_2d FHT_4[] = { 1635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fdct4, fdct4 }, // DCT_DCT = 0 1645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fadst4, fdct4 }, // ADST_DCT = 1 1655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fdct4, fadst4 }, // DCT_ADST = 2 1665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fadst4, fadst4 } // ADST_ADST = 3 167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}; 168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1692ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianvoid vp9_fht4x4_c(const int16_t *input, int16_t *output, 1702ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int stride, int tx_type) { 1712ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (tx_type == DCT_DCT) { 1722ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vp9_fdct4x4_c(input, output, stride); 1732ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } else { 1742ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int16_t out[4 * 4]; 1752ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int16_t *outptr = &out[0]; 1762ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int i, j; 1772ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int16_t temp_in[4], temp_out[4]; 1782ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const transform_2d ht = FHT_4[tx_type]; 179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1802ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Columns 1812ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < 4; ++i) { 1822ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 4; ++j) 1832ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian temp_in[j] = input[j * stride + i] * 16; 1842ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (i == 0 && temp_in[0]) 1852ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian temp_in[0] += 1; 1862ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian ht.cols(temp_in, temp_out); 1872ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 4; ++j) 1882ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian outptr[j * 4 + i] = temp_out[j]; 1892ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1912ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Rows 1922ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < 4; ++i) { 1932ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 4; ++j) 1942ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian temp_in[j] = out[j + i * 4]; 1952ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian ht.rows(temp_in, temp_out); 1962ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 4; ++j) 1972ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[j + i * 4] = (temp_out[j] + 1) >> 2; 1982ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void fdct8(const int16_t *input, int16_t *output) { 203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; 204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*needs32*/ int t0, t1, t2, t3; 205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int x0, x1, x2, x3; 206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = input[0] + input[7]; 209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = input[1] + input[6]; 210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = input[2] + input[5]; 211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = input[3] + input[4]; 212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = input[3] - input[4]; 213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = input[2] - input[5]; 214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = input[1] - input[6]; 215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = input[0] - input[7]; 216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // fdct4(step, step); 218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s3; 219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s2; 220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s1 - s2; 221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s0 - s3; 222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = (x0 + x1) * cospi_16_64; 223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = (x0 - x1) * cospi_16_64; 224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t2 = x2 * cospi_24_64 + x3 * cospi_8_64; 225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; 2262ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[0] = fdct_round_shift(t0); 2272ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[2] = fdct_round_shift(t2); 2282ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[4] = fdct_round_shift(t1); 2292ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[6] = fdct_round_shift(t3); 230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 2 232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = (s6 - s5) * cospi_16_64; 233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = (s6 + s5) * cospi_16_64; 2342ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian t2 = fdct_round_shift(t0); 2352ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian t3 = fdct_round_shift(t1); 236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 237ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 3 238ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s4 + t2; 239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s4 - t2; 240ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s7 - t3; 241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s7 + t3; 242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 4 244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = x0 * cospi_28_64 + x3 * cospi_4_64; 245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = x1 * cospi_12_64 + x2 * cospi_20_64; 246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; 247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; 2482ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[1] = fdct_round_shift(t0); 2492ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[3] = fdct_round_shift(t2); 2502ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[5] = fdct_round_shift(t1); 2512ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[7] = fdct_round_shift(t3); 252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 254ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid vp9_fdct8x8_1_c(const int16_t *input, int16_t *output, int stride) { 255ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int r, c; 256ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int16_t sum = 0; 257ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (r = 0; r < 8; ++r) 258ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (c = 0; c < 8; ++c) 259ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian sum += input[r * stride + c]; 260ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 261ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian output[0] = sum; 262ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian output[1] = 0; 263ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 264ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { 266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t intermediate[64]; 268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Transform columns 270ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang { 271ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *output = intermediate; 272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; 273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*needs32*/ int t0, t1, t2, t3; 274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int x0, x1, x2, x3; 275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 276ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i; 277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 8; i++) { 278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s0 = (input[0 * stride] + input[7 * stride]) * 4; 2805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s1 = (input[1 * stride] + input[6 * stride]) * 4; 2815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s2 = (input[2 * stride] + input[5 * stride]) * 4; 2825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s3 = (input[3 * stride] + input[4 * stride]) * 4; 2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s4 = (input[3 * stride] - input[4 * stride]) * 4; 2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s5 = (input[2 * stride] - input[5 * stride]) * 4; 2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s6 = (input[1 * stride] - input[6 * stride]) * 4; 2865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s7 = (input[0 * stride] - input[7 * stride]) * 4; 2875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // fdct4(step, step); 289ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s3; 290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s2; 291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s1 - s2; 292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s0 - s3; 293ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = (x0 + x1) * cospi_16_64; 294ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = (x0 - x1) * cospi_16_64; 295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t2 = x2 * cospi_24_64 + x3 * cospi_8_64; 296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; 2972ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[0 * 8] = fdct_round_shift(t0); 2982ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[2 * 8] = fdct_round_shift(t2); 2992ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[4 * 8] = fdct_round_shift(t1); 3002ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[6 * 8] = fdct_round_shift(t3); 301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 302ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 2 303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = (s6 - s5) * cospi_16_64; 304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = (s6 + s5) * cospi_16_64; 3052ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian t2 = fdct_round_shift(t0); 3062ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian t3 = fdct_round_shift(t1); 307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 3 309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s4 + t2; 310ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s4 - t2; 311ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s7 - t3; 312ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s7 + t3; 313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 314ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 4 315ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = x0 * cospi_28_64 + x3 * cospi_4_64; 316ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = x1 * cospi_12_64 + x2 * cospi_20_64; 317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; 318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; 3192ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[1 * 8] = fdct_round_shift(t0); 3202ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[3 * 8] = fdct_round_shift(t2); 3212ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[5 * 8] = fdct_round_shift(t1); 3222ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[7 * 8] = fdct_round_shift(t3); 323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input++; 324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output++; 325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Rows 329ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 8; ++i) { 3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang fdct8(&intermediate[i * 8], &final_output[i * 8]); 331ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 8; ++j) 332ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang final_output[j + i * 8] /= 2; 333ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 336ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid vp9_fdct16x16_1_c(const int16_t *input, int16_t *output, int stride) { 337ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int r, c; 338ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int16_t sum = 0; 339ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (r = 0; r < 16; ++r) 340ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (c = 0; c < 16; ++c) 341ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian sum += input[r * stride + c]; 342ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 343ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian output[0] = sum >> 1; 344ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian output[1] = 0; 345ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 346ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { 348ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // The 2D transform is done with two passes which are actually pretty 349ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // similar. In the first one, we transform the columns and transpose 350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // the results. In the second one, we transform the rows. To achieve that, 3512ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // as the first pass results are transposed, we transpose the columns (that 352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // is the transposed rows) and transpose the results (so that it goes back 353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // in normal/row positions). 354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int pass; 355ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // We need an intermediate buffer between passes. 356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t intermediate[256]; 3575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *in = input; 358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *out = intermediate; 359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Do the two transform/transpose passes 360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (pass = 0; pass < 2; ++pass) { 361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int step1[8]; 362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int step2[8]; 363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int step3[8]; 364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int input[8]; 365ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*needs32*/ int temp1, temp2; 366ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i; 367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 16; i++) { 368ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (0 == pass) { 369ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Calculate input for the first 8 results. 3705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[0] = (in[0 * stride] + in[15 * stride]) * 4; 3715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[1] = (in[1 * stride] + in[14 * stride]) * 4; 3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[2] = (in[2 * stride] + in[13 * stride]) * 4; 3735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[3] = (in[3 * stride] + in[12 * stride]) * 4; 3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[4] = (in[4 * stride] + in[11 * stride]) * 4; 3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[5] = (in[5 * stride] + in[10 * stride]) * 4; 3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[6] = (in[6 * stride] + in[ 9 * stride]) * 4; 3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input[7] = (in[7 * stride] + in[ 8 * stride]) * 4; 378ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Calculate input for the next 8 results. 3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step1[0] = (in[7 * stride] - in[ 8 * stride]) * 4; 3805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step1[1] = (in[6 * stride] - in[ 9 * stride]) * 4; 3815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step1[2] = (in[5 * stride] - in[10 * stride]) * 4; 3825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step1[3] = (in[4 * stride] - in[11 * stride]) * 4; 3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step1[4] = (in[3 * stride] - in[12 * stride]) * 4; 3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step1[5] = (in[2 * stride] - in[13 * stride]) * 4; 3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step1[6] = (in[1 * stride] - in[14 * stride]) * 4; 3865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step1[7] = (in[0 * stride] - in[15 * stride]) * 4; 387ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } else { 388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Calculate input for the first 8 results. 389ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2); 390ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2); 391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2); 392ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2); 393ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2); 394ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2); 395ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[6] = ((in[6 * 16] + 1) >> 2) + ((in[ 9 * 16] + 1) >> 2); 396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[7] = ((in[7 * 16] + 1) >> 2) + ((in[ 8 * 16] + 1) >> 2); 397ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Calculate input for the next 8 results. 398ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[ 8 * 16] + 1) >> 2); 399ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[ 9 * 16] + 1) >> 2); 400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2); 401ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2); 402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2); 403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2); 404ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2); 405ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2); 406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 4075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // Work on the first eight values; fdct8(input, even_results); 408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang { 409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; 410ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*needs32*/ int t0, t1, t2, t3; 411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int x0, x1, x2, x3; 412ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 413ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = input[0] + input[7]; 415ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = input[1] + input[6]; 416ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = input[2] + input[5]; 417ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = input[3] + input[4]; 418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = input[3] - input[4]; 419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = input[2] - input[5]; 420ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = input[1] - input[6]; 421ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = input[0] - input[7]; 422ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 4235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // fdct4(step, step); 424ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s3; 425ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s2; 426ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s1 - s2; 427ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s0 - s3; 428ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = (x0 + x1) * cospi_16_64; 429ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = (x0 - x1) * cospi_16_64; 430ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t2 = x3 * cospi_8_64 + x2 * cospi_24_64; 431ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t3 = x3 * cospi_24_64 - x2 * cospi_8_64; 4322ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[0] = fdct_round_shift(t0); 4332ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[4] = fdct_round_shift(t2); 4342ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[8] = fdct_round_shift(t1); 4352ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[12] = fdct_round_shift(t3); 436ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 437ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 2 438ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = (s6 - s5) * cospi_16_64; 439ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = (s6 + s5) * cospi_16_64; 4402ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian t2 = fdct_round_shift(t0); 4412ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian t3 = fdct_round_shift(t1); 442ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 443ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 3 444ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s4 + t2; 445ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s4 - t2; 446ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s7 - t3; 447ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s7 + t3; 448ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 449ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 4 450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = x0 * cospi_28_64 + x3 * cospi_4_64; 451ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = x1 * cospi_12_64 + x2 * cospi_20_64; 452ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; 453ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; 4542ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[2] = fdct_round_shift(t0); 4552ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[6] = fdct_round_shift(t2); 4562ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[10] = fdct_round_shift(t1); 4572ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[14] = fdct_round_shift(t3); 458ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 459ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Work on the next eight values; step1 -> odd_results 460ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang { 461ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 2 462ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step1[5] - step1[2]) * cospi_16_64; 463ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[4] - step1[3]) * cospi_16_64; 4642ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[2] = fdct_round_shift(temp1); 4652ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[3] = fdct_round_shift(temp2); 466ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step1[4] + step1[3]) * cospi_16_64; 467ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[5] + step1[2]) * cospi_16_64; 4682ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[4] = fdct_round_shift(temp1); 4692ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[5] = fdct_round_shift(temp2); 470ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 3 471ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[0] = step1[0] + step2[3]; 472ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[1] = step1[1] + step2[2]; 473ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[2] = step1[1] - step2[2]; 474ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[3] = step1[0] - step2[3]; 475ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[4] = step1[7] - step2[4]; 476ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[5] = step1[6] - step2[5]; 477ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[6] = step1[6] + step2[5]; 478ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[7] = step1[7] + step2[4]; 479ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 4 480ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; 481ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; 4822ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[1] = fdct_round_shift(temp1); 4832ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[2] = fdct_round_shift(temp2); 484ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; 485ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; 4862ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[5] = fdct_round_shift(temp1); 4872ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[6] = fdct_round_shift(temp2); 488ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 5 489ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = step3[0] + step2[1]; 490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = step3[0] - step2[1]; 491ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian step1[2] = step3[3] + step2[2]; 492ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian step1[3] = step3[3] - step2[2]; 493ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian step1[4] = step3[4] - step2[5]; 494ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian step1[5] = step3[4] + step2[5]; 495ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = step3[7] - step2[6]; 496ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = step3[7] + step2[6]; 497ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 6 498ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; 499ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; 5002ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[1] = fdct_round_shift(temp1); 5012ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[9] = fdct_round_shift(temp2); 502ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; 503ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; 5042ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[5] = fdct_round_shift(temp1); 5052ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[13] = fdct_round_shift(temp2); 506ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; 507ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; 5082ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[3] = fdct_round_shift(temp1); 5092ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[11] = fdct_round_shift(temp2); 510ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; 511ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; 5122ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[7] = fdct_round_shift(temp1); 5132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[15] = fdct_round_shift(temp2); 514ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 515ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Do next column (which is a transposed row in second/horizontal pass) 516ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang in++; 517ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang out += 16; 518ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 519ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Setup in/out for next pass. 520ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang in = intermediate; 521ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang out = output; 522ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 523ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 524ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 5255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void fadst8(const int16_t *input, int16_t *output) { 526ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int s0, s1, s2, s3, s4, s5, s6, s7; 527ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 528ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x0 = input[7]; 529ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x1 = input[0]; 530ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x2 = input[5]; 531ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x3 = input[2]; 532ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x4 = input[3]; 533ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x5 = input[4]; 534ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x6 = input[1]; 535ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x7 = input[6]; 536ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 537ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 538ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = cospi_2_64 * x0 + cospi_30_64 * x1; 539ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = cospi_30_64 * x0 - cospi_2_64 * x1; 540ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = cospi_10_64 * x2 + cospi_22_64 * x3; 541ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = cospi_22_64 * x2 - cospi_10_64 * x3; 542ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = cospi_18_64 * x4 + cospi_14_64 * x5; 543ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = cospi_14_64 * x4 - cospi_18_64 * x5; 544ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = cospi_26_64 * x6 + cospi_6_64 * x7; 545ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = cospi_6_64 * x6 - cospi_26_64 * x7; 546ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 5472ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x0 = fdct_round_shift(s0 + s4); 5482ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x1 = fdct_round_shift(s1 + s5); 5492ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x2 = fdct_round_shift(s2 + s6); 5502ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x3 = fdct_round_shift(s3 + s7); 5512ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x4 = fdct_round_shift(s0 - s4); 5522ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x5 = fdct_round_shift(s1 - s5); 5532ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x6 = fdct_round_shift(s2 - s6); 5542ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x7 = fdct_round_shift(s3 - s7); 555ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 556ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 2 557ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0; 558ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x1; 559ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2; 560ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x3; 561ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = cospi_8_64 * x4 + cospi_24_64 * x5; 562ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = cospi_24_64 * x4 - cospi_8_64 * x5; 563ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = - cospi_24_64 * x6 + cospi_8_64 * x7; 564ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = cospi_8_64 * x6 + cospi_24_64 * x7; 565ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 566ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s2; 567ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s3; 568ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s0 - s2; 569ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s1 - s3; 5702ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x4 = fdct_round_shift(s4 + s6); 5712ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x5 = fdct_round_shift(s5 + s7); 5722ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x6 = fdct_round_shift(s4 - s6); 5732ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x7 = fdct_round_shift(s5 - s7); 574ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 575ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 3 576ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = cospi_16_64 * (x2 + x3); 577ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = cospi_16_64 * (x2 - x3); 578ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = cospi_16_64 * (x6 + x7); 579ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = cospi_16_64 * (x6 - x7); 580ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 5812ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x2 = fdct_round_shift(s2); 5822ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x3 = fdct_round_shift(s3); 5832ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x6 = fdct_round_shift(s6); 5842ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x7 = fdct_round_shift(s7); 585ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 586ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = x0; 587ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = - x4; 588ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = x6; 589ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = - x2; 590ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = x3; 591ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = - x7; 592ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = x5; 593ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = - x1; 594ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 595ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 596ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic const transform_2d FHT_8[] = { 5975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fdct8, fdct8 }, // DCT_DCT = 0 5985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fadst8, fdct8 }, // ADST_DCT = 1 5995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fdct8, fadst8 }, // DCT_ADST = 2 6005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fadst8, fadst8 } // ADST_ADST = 3 601ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}; 602ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 6032ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianvoid vp9_fht8x8_c(const int16_t *input, int16_t *output, 6042ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int stride, int tx_type) { 6052ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (tx_type == DCT_DCT) { 6062ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vp9_fdct8x8_c(input, output, stride); 6072ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } else { 6082ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int16_t out[64]; 6092ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int16_t *outptr = &out[0]; 6102ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int i, j; 6112ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int16_t temp_in[8], temp_out[8]; 6122ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const transform_2d ht = FHT_8[tx_type]; 6132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 6142ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Columns 6152ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < 8; ++i) { 6162ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 8; ++j) 6172ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian temp_in[j] = input[j * stride + i] * 4; 6182ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian ht.cols(temp_in, temp_out); 6192ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 8; ++j) 6202ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian outptr[j * 8 + i] = temp_out[j]; 6212ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 622ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 6232ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Rows 6242ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < 8; ++i) { 6252ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 8; ++j) 6262ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian temp_in[j] = out[j + i * 8]; 6272ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian ht.rows(temp_in, temp_out); 6282ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 8; ++j) 6292ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; 6302ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 631ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 632ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 633ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 634ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per 635ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang pixel. */ 6365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_fwht4x4_c(const int16_t *input, int16_t *output, int stride) { 637ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i; 638ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int a1, b1, c1, d1, e1; 6395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *ip = input; 6405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t *op = output; 641ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 642ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; i++) { 6435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang a1 = ip[0 * stride]; 6445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang b1 = ip[1 * stride]; 6455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang c1 = ip[2 * stride]; 6465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang d1 = ip[3 * stride]; 647ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 648ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 += b1; 649ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 = d1 - c1; 650ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang e1 = (a1 - d1) >> 1; 651ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang b1 = e1 - b1; 652ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang c1 = e1 - c1; 653ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 -= c1; 654ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 += b1; 655ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[0] = a1; 656ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[4] = c1; 657ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[8] = d1; 658ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[12] = b1; 659ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 660ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ip++; 661ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op++; 662ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 663ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ip = output; 664ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op = output; 665ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 666ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; i++) { 667ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 = ip[0]; 668ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang b1 = ip[1]; 669ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang c1 = ip[2]; 670ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 = ip[3]; 671ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 672ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 += b1; 673ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 -= c1; 674ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang e1 = (a1 - d1) >> 1; 675ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang b1 = e1 - b1; 676ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang c1 = e1 - c1; 677ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 -= c1; 678ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 += b1; 6795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang op[0] = a1 * UNIT_QUANT_FACTOR; 6805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang op[1] = c1 * UNIT_QUANT_FACTOR; 6815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang op[2] = d1 * UNIT_QUANT_FACTOR; 6825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang op[3] = b1 * UNIT_QUANT_FACTOR; 683ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 684ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ip += 4; 685ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op += 4; 686ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 687ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 688ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 689ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang// Rewrote to use same algorithm as others. 6905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void fdct16(const int16_t in[16], int16_t out[16]) { 691ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int step1[8]; 692ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int step2[8]; 693ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int step3[8]; 694ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int input[8]; 695ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*needs32*/ int temp1, temp2; 696ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 697ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 1 698ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[0] = in[0] + in[15]; 699ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[1] = in[1] + in[14]; 700ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[2] = in[2] + in[13]; 701ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[3] = in[3] + in[12]; 702ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[4] = in[4] + in[11]; 703ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[5] = in[5] + in[10]; 704ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[6] = in[6] + in[ 9]; 705ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input[7] = in[7] + in[ 8]; 706ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 707ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = in[7] - in[ 8]; 708ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = in[6] - in[ 9]; 709ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = in[5] - in[10]; 710ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = in[4] - in[11]; 711ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = in[3] - in[12]; 712ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = in[2] - in[13]; 713ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = in[1] - in[14]; 714ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = in[0] - in[15]; 715ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 7165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // fdct8(step, step); 717ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang { 718ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; 719ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*needs32*/ int t0, t1, t2, t3; 720ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang /*canbe16*/ int x0, x1, x2, x3; 721ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 722ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 723ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = input[0] + input[7]; 724ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = input[1] + input[6]; 725ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = input[2] + input[5]; 726ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = input[3] + input[4]; 727ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = input[3] - input[4]; 728ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = input[2] - input[5]; 729ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = input[1] - input[6]; 730ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = input[0] - input[7]; 731ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 7325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // fdct4(step, step); 733ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s3; 734ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s2; 735ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s1 - s2; 736ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s0 - s3; 737ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = (x0 + x1) * cospi_16_64; 738ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = (x0 - x1) * cospi_16_64; 739ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t2 = x3 * cospi_8_64 + x2 * cospi_24_64; 740ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t3 = x3 * cospi_24_64 - x2 * cospi_8_64; 7412ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[0] = fdct_round_shift(t0); 7422ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[4] = fdct_round_shift(t2); 7432ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[8] = fdct_round_shift(t1); 7442ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[12] = fdct_round_shift(t3); 745ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 746ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 2 747ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = (s6 - s5) * cospi_16_64; 748ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = (s6 + s5) * cospi_16_64; 7492ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian t2 = fdct_round_shift(t0); 7502ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian t3 = fdct_round_shift(t1); 751ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 752ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 3 753ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s4 + t2; 754ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s4 - t2; 755ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s7 - t3; 756ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s7 + t3; 757ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 758ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 4 759ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t0 = x0 * cospi_28_64 + x3 * cospi_4_64; 760ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t1 = x1 * cospi_12_64 + x2 * cospi_20_64; 761ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; 762ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; 7632ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[2] = fdct_round_shift(t0); 7642ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[6] = fdct_round_shift(t2); 7652ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[10] = fdct_round_shift(t1); 7662ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[14] = fdct_round_shift(t3); 767ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 768ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 769ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 2 770ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step1[5] - step1[2]) * cospi_16_64; 771ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[4] - step1[3]) * cospi_16_64; 7722ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[2] = fdct_round_shift(temp1); 7732ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[3] = fdct_round_shift(temp2); 774ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step1[4] + step1[3]) * cospi_16_64; 775ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[5] + step1[2]) * cospi_16_64; 7762ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[4] = fdct_round_shift(temp1); 7772ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[5] = fdct_round_shift(temp2); 778ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 779ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 3 780ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[0] = step1[0] + step2[3]; 781ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[1] = step1[1] + step2[2]; 782ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[2] = step1[1] - step2[2]; 783ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[3] = step1[0] - step2[3]; 784ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[4] = step1[7] - step2[4]; 785ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[5] = step1[6] - step2[5]; 786ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[6] = step1[6] + step2[5]; 787ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step3[7] = step1[7] + step2[4]; 788ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 789ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 4 790ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; 791ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; 7922ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[1] = fdct_round_shift(temp1); 7932ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[2] = fdct_round_shift(temp2); 794ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; 795ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; 7962ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[5] = fdct_round_shift(temp1); 7972ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian step2[6] = fdct_round_shift(temp2); 798ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 799ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 5 800ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = step3[0] + step2[1]; 801ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = step3[0] - step2[1]; 802ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian step1[2] = step3[3] + step2[2]; 803ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian step1[3] = step3[3] - step2[2]; 804ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian step1[4] = step3[4] - step2[5]; 805ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian step1[5] = step3[4] + step2[5]; 806ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = step3[7] - step2[6]; 807ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = step3[7] + step2[6]; 808ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 809ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // step 6 810ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; 811ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; 8122ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[1] = fdct_round_shift(temp1); 8132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[9] = fdct_round_shift(temp2); 814ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 815ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; 816ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; 8172ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[5] = fdct_round_shift(temp1); 8182ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[13] = fdct_round_shift(temp2); 819ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 820ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; 821ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; 8222ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[3] = fdct_round_shift(temp1); 8232ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[11] = fdct_round_shift(temp2); 824ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 825ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; 826ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; 8272ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[7] = fdct_round_shift(temp1); 8282ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian out[15] = fdct_round_shift(temp2); 829ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 830ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 8315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void fadst16(const int16_t *input, int16_t *output) { 832ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; 833ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 834ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x0 = input[15]; 835ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x1 = input[0]; 836ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x2 = input[13]; 837ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x3 = input[2]; 838ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x4 = input[11]; 839ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x5 = input[4]; 840ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x6 = input[9]; 841ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x7 = input[6]; 842ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x8 = input[7]; 843ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x9 = input[8]; 844ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x10 = input[5]; 845ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x11 = input[10]; 846ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x12 = input[3]; 847ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x13 = input[12]; 848ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x14 = input[1]; 849ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x15 = input[14]; 850ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 851ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 852ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0 * cospi_1_64 + x1 * cospi_31_64; 853ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x0 * cospi_31_64 - x1 * cospi_1_64; 854ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2 * cospi_5_64 + x3 * cospi_27_64; 855ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x2 * cospi_27_64 - x3 * cospi_5_64; 856ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = x4 * cospi_9_64 + x5 * cospi_23_64; 857ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = x4 * cospi_23_64 - x5 * cospi_9_64; 858ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = x6 * cospi_13_64 + x7 * cospi_19_64; 859ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = x6 * cospi_19_64 - x7 * cospi_13_64; 860ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s8 = x8 * cospi_17_64 + x9 * cospi_15_64; 861ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s9 = x8 * cospi_15_64 - x9 * cospi_17_64; 862ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s10 = x10 * cospi_21_64 + x11 * cospi_11_64; 863ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s11 = x10 * cospi_11_64 - x11 * cospi_21_64; 864ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s12 = x12 * cospi_25_64 + x13 * cospi_7_64; 865ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s13 = x12 * cospi_7_64 - x13 * cospi_25_64; 866ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s14 = x14 * cospi_29_64 + x15 * cospi_3_64; 867ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s15 = x14 * cospi_3_64 - x15 * cospi_29_64; 868ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 8692ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x0 = fdct_round_shift(s0 + s8); 8702ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x1 = fdct_round_shift(s1 + s9); 8712ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x2 = fdct_round_shift(s2 + s10); 8722ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x3 = fdct_round_shift(s3 + s11); 8732ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x4 = fdct_round_shift(s4 + s12); 8742ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x5 = fdct_round_shift(s5 + s13); 8752ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x6 = fdct_round_shift(s6 + s14); 8762ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x7 = fdct_round_shift(s7 + s15); 8772ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x8 = fdct_round_shift(s0 - s8); 8782ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x9 = fdct_round_shift(s1 - s9); 8792ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x10 = fdct_round_shift(s2 - s10); 8802ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x11 = fdct_round_shift(s3 - s11); 8812ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x12 = fdct_round_shift(s4 - s12); 8822ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x13 = fdct_round_shift(s5 - s13); 8832ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x14 = fdct_round_shift(s6 - s14); 8842ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x15 = fdct_round_shift(s7 - s15); 885ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 886ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 2 887ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0; 888ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x1; 889ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2; 890ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x3; 891ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = x4; 892ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = x5; 893ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = x6; 894ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = x7; 895ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s8 = x8 * cospi_4_64 + x9 * cospi_28_64; 896ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s9 = x8 * cospi_28_64 - x9 * cospi_4_64; 897ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s10 = x10 * cospi_20_64 + x11 * cospi_12_64; 898ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s11 = x10 * cospi_12_64 - x11 * cospi_20_64; 899ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; 900ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s13 = x12 * cospi_4_64 + x13 * cospi_28_64; 901ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; 902ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s15 = x14 * cospi_20_64 + x15 * cospi_12_64; 903ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 904ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s4; 905ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s5; 906ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s2 + s6; 907ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s3 + s7; 908ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x4 = s0 - s4; 909ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x5 = s1 - s5; 910ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x6 = s2 - s6; 911ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x7 = s3 - s7; 9122ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x8 = fdct_round_shift(s8 + s12); 9132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x9 = fdct_round_shift(s9 + s13); 9142ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x10 = fdct_round_shift(s10 + s14); 9152ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x11 = fdct_round_shift(s11 + s15); 9162ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x12 = fdct_round_shift(s8 - s12); 9172ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x13 = fdct_round_shift(s9 - s13); 9182ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x14 = fdct_round_shift(s10 - s14); 9192ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x15 = fdct_round_shift(s11 - s15); 920ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 921ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 3 922ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0; 923ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x1; 924ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2; 925ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x3; 926ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = x4 * cospi_8_64 + x5 * cospi_24_64; 927ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = x4 * cospi_24_64 - x5 * cospi_8_64; 928ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; 929ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = x6 * cospi_8_64 + x7 * cospi_24_64; 930ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s8 = x8; 931ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s9 = x9; 932ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s10 = x10; 933ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s11 = x11; 934ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s12 = x12 * cospi_8_64 + x13 * cospi_24_64; 935ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s13 = x12 * cospi_24_64 - x13 * cospi_8_64; 936ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; 937ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s15 = x14 * cospi_8_64 + x15 * cospi_24_64; 938ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 939ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s2; 940ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s3; 941ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s0 - s2; 942ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s1 - s3; 9432ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x4 = fdct_round_shift(s4 + s6); 9442ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x5 = fdct_round_shift(s5 + s7); 9452ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x6 = fdct_round_shift(s4 - s6); 9462ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x7 = fdct_round_shift(s5 - s7); 947ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x8 = s8 + s10; 948ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x9 = s9 + s11; 949ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x10 = s8 - s10; 950ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x11 = s9 - s11; 9512ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x12 = fdct_round_shift(s12 + s14); 9522ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x13 = fdct_round_shift(s13 + s15); 9532ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x14 = fdct_round_shift(s12 - s14); 9542ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x15 = fdct_round_shift(s13 - s15); 955ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 956ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 4 957ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = (- cospi_16_64) * (x2 + x3); 958ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = cospi_16_64 * (x2 - x3); 959ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = cospi_16_64 * (x6 + x7); 960ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = cospi_16_64 * (- x6 + x7); 961ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s10 = cospi_16_64 * (x10 + x11); 962ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s11 = cospi_16_64 * (- x10 + x11); 963ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s14 = (- cospi_16_64) * (x14 + x15); 964ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s15 = cospi_16_64 * (x14 - x15); 965ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 9662ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x2 = fdct_round_shift(s2); 9672ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x3 = fdct_round_shift(s3); 9682ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x6 = fdct_round_shift(s6); 9692ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x7 = fdct_round_shift(s7); 9702ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x10 = fdct_round_shift(s10); 9712ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x11 = fdct_round_shift(s11); 9722ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x14 = fdct_round_shift(s14); 9732ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian x15 = fdct_round_shift(s15); 974ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 975ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = x0; 976ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = - x8; 977ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = x12; 978ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = - x4; 979ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = x6; 980ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = x14; 981ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = x10; 982ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = x2; 983ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[8] = x3; 984ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[9] = x11; 985ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[10] = x15; 986ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[11] = x7; 987ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[12] = x5; 988ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[13] = - x13; 989ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[14] = x9; 990ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[15] = - x1; 991ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 992ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 993ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic const transform_2d FHT_16[] = { 9945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fdct16, fdct16 }, // DCT_DCT = 0 9955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fadst16, fdct16 }, // ADST_DCT = 1 9965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fdct16, fadst16 }, // DCT_ADST = 2 9975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { fadst16, fadst16 } // ADST_ADST = 3 998ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}; 999ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 10002ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianvoid vp9_fht16x16_c(const int16_t *input, int16_t *output, 10012ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int stride, int tx_type) { 10022ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian if (tx_type == DCT_DCT) { 10032ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian vp9_fdct16x16_c(input, output, stride); 10042ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } else { 10052ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int16_t out[256]; 10062ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int16_t *outptr = &out[0]; 10072ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int i, j; 10082ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian int16_t temp_in[16], temp_out[16]; 10092ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian const transform_2d ht = FHT_16[tx_type]; 10102ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian 10112ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Columns 10122ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < 16; ++i) { 10132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 16; ++j) 10142ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian temp_in[j] = input[j * stride + i] * 4; 10152ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian ht.cols(temp_in, temp_out); 10162ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 16; ++j) 10172ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; 10182ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 1019ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 10202ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian // Rows 10212ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (i = 0; i < 16; ++i) { 10222ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 16; ++j) 10232ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian temp_in[j] = out[j + i * 16]; 10242ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian ht.rows(temp_in, temp_out); 10252ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian for (j = 0; j < 16; ++j) 10262ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian output[j + i * 16] = temp_out[j]; 10272ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian } 1028ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 1029ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 1030ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1031ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic INLINE int dct_32_round(int input) { 1032ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); 1033ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang assert(-131072 <= rv && rv <= 131071); 1034ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return rv; 1035ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 1036ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1037ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic INLINE int half_round_shift(int input) { 1038ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int rv = (input + 1 + (input < 0)) >> 2; 1039ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return rv; 1040ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 1041ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 10422ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanianstatic void fdct32(const int *input, int *output, int round) { 1043ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int step[32]; 1044ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 1 1045ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[0] = input[0] + input[(32 - 1)]; 1046ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[1] = input[1] + input[(32 - 2)]; 1047ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[2] = input[2] + input[(32 - 3)]; 1048ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[3] = input[3] + input[(32 - 4)]; 1049ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[4] = input[4] + input[(32 - 5)]; 1050ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[5] = input[5] + input[(32 - 6)]; 1051ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[6] = input[6] + input[(32 - 7)]; 1052ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[7] = input[7] + input[(32 - 8)]; 1053ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[8] = input[8] + input[(32 - 9)]; 1054ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[9] = input[9] + input[(32 - 10)]; 1055ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[10] = input[10] + input[(32 - 11)]; 1056ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[11] = input[11] + input[(32 - 12)]; 1057ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[12] = input[12] + input[(32 - 13)]; 1058ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[13] = input[13] + input[(32 - 14)]; 1059ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[14] = input[14] + input[(32 - 15)]; 1060ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[15] = input[15] + input[(32 - 16)]; 1061ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[16] = -input[16] + input[(32 - 17)]; 1062ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[17] = -input[17] + input[(32 - 18)]; 1063ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[18] = -input[18] + input[(32 - 19)]; 1064ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[19] = -input[19] + input[(32 - 20)]; 1065ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[20] = -input[20] + input[(32 - 21)]; 1066ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[21] = -input[21] + input[(32 - 22)]; 1067ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[22] = -input[22] + input[(32 - 23)]; 1068ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[23] = -input[23] + input[(32 - 24)]; 1069ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[24] = -input[24] + input[(32 - 25)]; 1070ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[25] = -input[25] + input[(32 - 26)]; 1071ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[26] = -input[26] + input[(32 - 27)]; 1072ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[27] = -input[27] + input[(32 - 28)]; 1073ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[28] = -input[28] + input[(32 - 29)]; 1074ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[29] = -input[29] + input[(32 - 30)]; 1075ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[30] = -input[30] + input[(32 - 31)]; 1076ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[31] = -input[31] + input[(32 - 32)]; 1077ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1078ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 2 1079ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = step[0] + step[16 - 1]; 1080ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = step[1] + step[16 - 2]; 1081ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = step[2] + step[16 - 3]; 1082ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = step[3] + step[16 - 4]; 1083ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = step[4] + step[16 - 5]; 1084ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = step[5] + step[16 - 6]; 1085ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = step[6] + step[16 - 7]; 1086ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = step[7] + step[16 - 8]; 1087ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[8] = -step[8] + step[16 - 9]; 1088ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[9] = -step[9] + step[16 - 10]; 1089ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[10] = -step[10] + step[16 - 11]; 1090ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[11] = -step[11] + step[16 - 12]; 1091ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[12] = -step[12] + step[16 - 13]; 1092ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[13] = -step[13] + step[16 - 14]; 1093ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[14] = -step[14] + step[16 - 15]; 1094ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[15] = -step[15] + step[16 - 16]; 1095ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1096ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[16] = step[16]; 1097ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[17] = step[17]; 1098ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[18] = step[18]; 1099ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[19] = step[19]; 1100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[20] = dct_32_round((-step[20] + step[27]) * cospi_16_64); 1102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[21] = dct_32_round((-step[21] + step[26]) * cospi_16_64); 1103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[22] = dct_32_round((-step[22] + step[25]) * cospi_16_64); 1104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[23] = dct_32_round((-step[23] + step[24]) * cospi_16_64); 1105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[24] = dct_32_round((step[24] + step[23]) * cospi_16_64); 1107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[25] = dct_32_round((step[25] + step[22]) * cospi_16_64); 1108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[26] = dct_32_round((step[26] + step[21]) * cospi_16_64); 1109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[27] = dct_32_round((step[27] + step[20]) * cospi_16_64); 1110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[28] = step[28]; 1112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[29] = step[29]; 1113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[30] = step[30]; 1114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[31] = step[31]; 1115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 11161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang // dump the magnitude by 4, hence the intermediate values are within 11171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang // the range of 16 bits. 11181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang if (round) { 11191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[0] = half_round_shift(output[0]); 11201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[1] = half_round_shift(output[1]); 11211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[2] = half_round_shift(output[2]); 11221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[3] = half_round_shift(output[3]); 11231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[4] = half_round_shift(output[4]); 11241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[5] = half_round_shift(output[5]); 11251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[6] = half_round_shift(output[6]); 11261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[7] = half_round_shift(output[7]); 11271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[8] = half_round_shift(output[8]); 11281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[9] = half_round_shift(output[9]); 11291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[10] = half_round_shift(output[10]); 11301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[11] = half_round_shift(output[11]); 11311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[12] = half_round_shift(output[12]); 11321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[13] = half_round_shift(output[13]); 11331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[14] = half_round_shift(output[14]); 11341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[15] = half_round_shift(output[15]); 11351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang 11361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[16] = half_round_shift(output[16]); 11371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[17] = half_round_shift(output[17]); 11381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[18] = half_round_shift(output[18]); 11391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[19] = half_round_shift(output[19]); 11401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[20] = half_round_shift(output[20]); 11411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[21] = half_round_shift(output[21]); 11421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[22] = half_round_shift(output[22]); 11431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[23] = half_round_shift(output[23]); 11441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[24] = half_round_shift(output[24]); 11451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[25] = half_round_shift(output[25]); 11461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[26] = half_round_shift(output[26]); 11471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[27] = half_round_shift(output[27]); 11481184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[28] = half_round_shift(output[28]); 11491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[29] = half_round_shift(output[29]); 11501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[30] = half_round_shift(output[30]); 11511184aebb761cbeac9124c37189a80a1a58f04b6bhkuang output[31] = half_round_shift(output[31]); 11521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang } 11531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang 1154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 3 1155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[0] = output[0] + output[(8 - 1)]; 1156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[1] = output[1] + output[(8 - 2)]; 1157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[2] = output[2] + output[(8 - 3)]; 1158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[3] = output[3] + output[(8 - 4)]; 1159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[4] = -output[4] + output[(8 - 5)]; 1160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[5] = -output[5] + output[(8 - 6)]; 1161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[6] = -output[6] + output[(8 - 7)]; 1162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[7] = -output[7] + output[(8 - 8)]; 1163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[8] = output[8]; 1164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[9] = output[9]; 1165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[10] = dct_32_round((-output[10] + output[13]) * cospi_16_64); 1166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[11] = dct_32_round((-output[11] + output[12]) * cospi_16_64); 1167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[12] = dct_32_round((output[12] + output[11]) * cospi_16_64); 1168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[13] = dct_32_round((output[13] + output[10]) * cospi_16_64); 1169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[14] = output[14]; 1170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[15] = output[15]; 1171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[16] = output[16] + output[23]; 1173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[17] = output[17] + output[22]; 1174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[18] = output[18] + output[21]; 1175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[19] = output[19] + output[20]; 1176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[20] = -output[20] + output[19]; 1177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[21] = -output[21] + output[18]; 1178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[22] = -output[22] + output[17]; 1179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[23] = -output[23] + output[16]; 1180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[24] = -output[24] + output[31]; 1181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[25] = -output[25] + output[30]; 1182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[26] = -output[26] + output[29]; 1183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[27] = -output[27] + output[28]; 1184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[28] = output[28] + output[27]; 1185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[29] = output[29] + output[26]; 1186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[30] = output[30] + output[25]; 1187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[31] = output[31] + output[24]; 1188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 4 1190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = step[0] + step[3]; 1191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = step[1] + step[2]; 1192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = -step[2] + step[1]; 1193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = -step[3] + step[0]; 1194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = step[4]; 1195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64); 1196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64); 1197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = step[7]; 1198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[8] = step[8] + step[11]; 1199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[9] = step[9] + step[10]; 1200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[10] = -step[10] + step[9]; 1201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[11] = -step[11] + step[8]; 1202ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[12] = -step[12] + step[15]; 1203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[13] = -step[13] + step[14]; 1204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[14] = step[14] + step[13]; 1205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[15] = step[15] + step[12]; 1206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[16] = step[16]; 1208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[17] = step[17]; 1209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[18] = dct_32_round(step[18] * -cospi_8_64 + step[29] * cospi_24_64); 1210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[19] = dct_32_round(step[19] * -cospi_8_64 + step[28] * cospi_24_64); 1211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[20] = dct_32_round(step[20] * -cospi_24_64 + step[27] * -cospi_8_64); 1212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[21] = dct_32_round(step[21] * -cospi_24_64 + step[26] * -cospi_8_64); 1213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[22] = step[22]; 1214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[23] = step[23]; 1215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[24] = step[24]; 1216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[25] = step[25]; 1217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[26] = dct_32_round(step[26] * cospi_24_64 + step[21] * -cospi_8_64); 1218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[27] = dct_32_round(step[27] * cospi_24_64 + step[20] * -cospi_8_64); 1219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[28] = dct_32_round(step[28] * cospi_8_64 + step[19] * cospi_24_64); 1220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[29] = dct_32_round(step[29] * cospi_8_64 + step[18] * cospi_24_64); 1221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[30] = step[30]; 1222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[31] = step[31]; 1223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 5 1225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[0] = dct_32_round((output[0] + output[1]) * cospi_16_64); 1226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[1] = dct_32_round((-output[1] + output[0]) * cospi_16_64); 1227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[2] = dct_32_round(output[2] * cospi_24_64 + output[3] * cospi_8_64); 1228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[3] = dct_32_round(output[3] * cospi_24_64 - output[2] * cospi_8_64); 1229ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[4] = output[4] + output[5]; 1230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[5] = -output[5] + output[4]; 1231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[6] = -output[6] + output[7]; 1232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[7] = output[7] + output[6]; 1233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[8] = output[8]; 1234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[9] = dct_32_round(output[9] * -cospi_8_64 + output[14] * cospi_24_64); 1235ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[10] = dct_32_round(output[10] * -cospi_24_64 + output[13] * -cospi_8_64); 1236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[11] = output[11]; 1237ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[12] = output[12]; 1238ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[13] = dct_32_round(output[13] * cospi_24_64 + output[10] * -cospi_8_64); 1239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[14] = dct_32_round(output[14] * cospi_8_64 + output[9] * cospi_24_64); 1240ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[15] = output[15]; 1241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[16] = output[16] + output[19]; 1243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[17] = output[17] + output[18]; 1244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[18] = -output[18] + output[17]; 1245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[19] = -output[19] + output[16]; 1246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[20] = -output[20] + output[23]; 1247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[21] = -output[21] + output[22]; 1248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[22] = output[22] + output[21]; 1249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[23] = output[23] + output[20]; 1250ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[24] = output[24] + output[27]; 1251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[25] = output[25] + output[26]; 1252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[26] = -output[26] + output[25]; 1253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[27] = -output[27] + output[24]; 1254ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[28] = -output[28] + output[31]; 1255ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[29] = -output[29] + output[30]; 1256ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[30] = output[30] + output[29]; 1257ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[31] = output[31] + output[28]; 1258ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1259ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 6 1260ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = step[0]; 1261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = step[1]; 1262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = step[2]; 1263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = step[3]; 1264ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = dct_32_round(step[4] * cospi_28_64 + step[7] * cospi_4_64); 1265ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = dct_32_round(step[5] * cospi_12_64 + step[6] * cospi_20_64); 1266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = dct_32_round(step[6] * cospi_12_64 + step[5] * -cospi_20_64); 1267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = dct_32_round(step[7] * cospi_28_64 + step[4] * -cospi_4_64); 1268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[8] = step[8] + step[9]; 1269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[9] = -step[9] + step[8]; 1270ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[10] = -step[10] + step[11]; 1271ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[11] = step[11] + step[10]; 1272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[12] = step[12] + step[13]; 1273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[13] = -step[13] + step[12]; 1274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[14] = -step[14] + step[15]; 1275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[15] = step[15] + step[14]; 1276ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[16] = step[16]; 1278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[17] = dct_32_round(step[17] * -cospi_4_64 + step[30] * cospi_28_64); 1279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[18] = dct_32_round(step[18] * -cospi_28_64 + step[29] * -cospi_4_64); 1280ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[19] = step[19]; 1281ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[20] = step[20]; 1282ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[21] = dct_32_round(step[21] * -cospi_20_64 + step[26] * cospi_12_64); 1283ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[22] = dct_32_round(step[22] * -cospi_12_64 + step[25] * -cospi_20_64); 1284ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[23] = step[23]; 1285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[24] = step[24]; 1286ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[25] = dct_32_round(step[25] * cospi_12_64 + step[22] * -cospi_20_64); 1287ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[26] = dct_32_round(step[26] * cospi_20_64 + step[21] * cospi_12_64); 1288ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[27] = step[27]; 1289ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[28] = step[28]; 1290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[29] = dct_32_round(step[29] * cospi_28_64 + step[18] * -cospi_4_64); 1291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[30] = dct_32_round(step[30] * cospi_4_64 + step[17] * cospi_28_64); 1292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[31] = step[31]; 1293ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1294ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Stage 7 1295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[0] = output[0]; 1296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[1] = output[1]; 1297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[2] = output[2]; 1298ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[3] = output[3]; 1299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[4] = output[4]; 1300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[5] = output[5]; 1301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[6] = output[6]; 1302ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[7] = output[7]; 1303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[8] = dct_32_round(output[8] * cospi_30_64 + output[15] * cospi_2_64); 1304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[9] = dct_32_round(output[9] * cospi_14_64 + output[14] * cospi_18_64); 1305ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[10] = dct_32_round(output[10] * cospi_22_64 + output[13] * cospi_10_64); 1306ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[11] = dct_32_round(output[11] * cospi_6_64 + output[12] * cospi_26_64); 1307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[12] = dct_32_round(output[12] * cospi_6_64 + output[11] * -cospi_26_64); 1308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[13] = dct_32_round(output[13] * cospi_22_64 + output[10] * -cospi_10_64); 1309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[14] = dct_32_round(output[14] * cospi_14_64 + output[9] * -cospi_18_64); 1310ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[15] = dct_32_round(output[15] * cospi_30_64 + output[8] * -cospi_2_64); 1311ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1312ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[16] = output[16] + output[17]; 1313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[17] = -output[17] + output[16]; 1314ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[18] = -output[18] + output[19]; 1315ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[19] = output[19] + output[18]; 1316ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[20] = output[20] + output[21]; 1317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[21] = -output[21] + output[20]; 1318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[22] = -output[22] + output[23]; 1319ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[23] = output[23] + output[22]; 1320ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[24] = output[24] + output[25]; 1321ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[25] = -output[25] + output[24]; 1322ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[26] = -output[26] + output[27]; 1323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[27] = output[27] + output[26]; 1324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[28] = output[28] + output[29]; 1325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[29] = -output[29] + output[28]; 1326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[30] = -output[30] + output[31]; 1327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[31] = output[31] + output[30]; 1328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1329ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Final stage --- outputs indices are bit-reversed. 1330ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = step[0]; 1331ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[16] = step[1]; 1332ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[8] = step[2]; 1333ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[24] = step[3]; 1334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = step[4]; 1335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[20] = step[5]; 1336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[12] = step[6]; 1337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[28] = step[7]; 1338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = step[8]; 1339ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[18] = step[9]; 1340ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[10] = step[10]; 1341ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[26] = step[11]; 1342ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = step[12]; 1343ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[22] = step[13]; 1344ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[14] = step[14]; 1345ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[30] = step[15]; 1346ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1347ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64); 1348ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64); 1349ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64); 1350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64); 1351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64); 1352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64); 1353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64); 1354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64); 1355ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64); 1356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64); 1357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64); 1358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64); 1359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64); 1360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64); 1361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64); 1362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); 1363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 1364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1365ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanianvoid vp9_fdct32x32_1_c(const int16_t *input, int16_t *output, int stride) { 1366ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int r, c; 1367ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian int16_t sum = 0; 1368ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (r = 0; r < 32; ++r) 1369ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian for (c = 0; c < 32; ++c) 1370ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian sum += input[r * stride + c]; 1371ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 1372ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian output[0] = sum >> 3; 1373ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian output[1] = 0; 1374ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian} 1375ba6c59e9d7d7013b3906b6f4230b663422681848Vignesh Venkatasubramanian 13765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) { 1377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 1378ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int output[32 * 32]; 1379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1380ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Columns 1381ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 32; ++i) { 1382ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int temp_in[32], temp_out[32]; 1383ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 13845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp_in[j] = input[j * stride + i] * 4; 13852ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian fdct32(temp_in, temp_out, 0); 1386ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 1387ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; 1388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 1389ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1390ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Rows 1391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 32; ++i) { 1392ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int temp_in[32], temp_out[32]; 1393ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 1394ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = output[j + i * 32]; 13952ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian fdct32(temp_in, temp_out, 0); 1396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 1397ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; 1398ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 1399ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 1400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 14012ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian// Note that although we use dct_32_round in dct32 computation flow, 1402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang// this 2d fdct32x32 for rate-distortion optimization loop is operating 1403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang// within 16 bits precision. 14045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { 1405ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 1406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int output[32 * 32]; 1407ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Columns 1409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 32; ++i) { 1410ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int temp_in[32], temp_out[32]; 1411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 14125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp_in[j] = input[j * stride + i] * 4; 14132ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian fdct32(temp_in, temp_out, 0); 1414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 141591037db265ecdd914a26e056cf69207b4f50924ehkuang // TODO(cd): see quality impact of only doing 141691037db265ecdd914a26e056cf69207b4f50924ehkuang // output[j * 32 + i] = (temp_out[j] + 1) >> 2; 141791037db265ecdd914a26e056cf69207b4f50924ehkuang // PS: also change code in vp9/encoder/x86/vp9_dct_sse2.c 1418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; 1419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 1420ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1421ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Rows 1422ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 32; ++i) { 1423ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int temp_in[32], temp_out[32]; 1424ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 1425ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = output[j + i * 32]; 14262ec72e65689c948e92b826ae1e867bf369e72f13Vignesh Venkatasubramanian fdct32(temp_in, temp_out, 1); 1427ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 1428ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang out[j + i * 32] = temp_out[j]; 1429ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 1430ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 1431