1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* 2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * Use of this source code is governed by a BSD-style license 5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * that can be found in the LICENSE file in the root of the source 6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * tree. An additional intellectual property rights grant can be found 7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * in the file PATENTS. All contributing project authors may 8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * be found in the AUTHORS file in the root of the source tree. 9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */ 10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 11ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include <assert.h> 12ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include <math.h> 13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "./vpx_config.h" 15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "./vp9_rtcd.h" 16ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_systemdependent.h" 17ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_blockd.h" 18ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_common.h" 19ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_idct.h" 20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { 22ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 23ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 0.5 shifts per pixel. */ 24ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i; 25ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t output[16]; 26ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int a1, b1, c1, d1, e1; 275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *ip = input; 28ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *op = output; 29ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 30ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; i++) { 315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang a1 = ip[0] >> UNIT_QUANT_SHIFT; 325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang c1 = ip[1] >> UNIT_QUANT_SHIFT; 335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang d1 = ip[2] >> UNIT_QUANT_SHIFT; 345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang b1 = ip[3] >> UNIT_QUANT_SHIFT; 35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 += c1; 36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 -= b1; 37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang e1 = (a1 - d1) >> 1; 38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang b1 = e1 - b1; 39ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang c1 = e1 - c1; 40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 -= b1; 41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 += c1; 42ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[0] = a1; 43ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[1] = b1; 44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[2] = c1; 45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[3] = d1; 46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ip += 4; 47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op += 4; 48ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 49ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ip = output; 51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; i++) { 52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 = ip[4 * 0]; 53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang c1 = ip[4 * 1]; 54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 = ip[4 * 2]; 55ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang b1 = ip[4 * 3]; 56ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 += c1; 57ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 -= b1; 58ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang e1 = (a1 - d1) >> 1; 59ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang b1 = e1 - b1; 60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang c1 = e1 - c1; 61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 -= b1; 62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang d1 += c1; 635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[stride * 0] = clip_pixel(dest[stride * 0] + a1); 645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[stride * 1] = clip_pixel(dest[stride * 1] + b1); 655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[stride * 2] = clip_pixel(dest[stride * 2] + c1); 665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[stride * 3] = clip_pixel(dest[stride * 3] + d1); 67ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 68ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ip++; 69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest++; 70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) { 74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i; 75ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int a1, e1; 76ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t tmp[4]; 775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *ip = in; 78ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *op = tmp; 79ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang a1 = ip[0] >> UNIT_QUANT_SHIFT; 81ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang e1 = a1 >> 1; 82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 -= e1; 83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[0] = a1; 84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang op[1] = op[2] = op[3] = e1; 85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ip = tmp; 87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; i++) { 88ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang e1 = ip[0] >> 1; 89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 = ip[0] - e1; 90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); 91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); 92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); 93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); 94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ip++; 95ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest++; 96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 98ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 99b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void idct4(const int16_t *input, int16_t *output) { 100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t step[4]; 101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int temp1, temp2; 102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (input[0] + input[2]) * cospi_16_64; 104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (input[0] - input[2]) * cospi_16_64; 105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[0] = dct_const_round_shift(temp1); 106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[1] = dct_const_round_shift(temp2); 107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; 108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; 109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[2] = dct_const_round_shift(temp1); 110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step[3] = dct_const_round_shift(temp2); 111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 2 113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = step[0] + step[3]; 114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = step[1] + step[2]; 115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = step[1] - step[2]; 116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = step[0] - step[3]; 117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { 120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out[4 * 4]; 121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *outptr = out; 122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t temp_in[4], temp_out[4]; 124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Rows 126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; ++i) { 127b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct4(input, outptr); 128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input += 4; 129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang outptr += 4; 130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Columns 133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; ++i) { 134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 4; ++j) 135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = out[j * 4 + i]; 136b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct4(temp_in, temp_out); 137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 4; ++j) 1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) 1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * stride + i]); 140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) { 144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i; 145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int a1; 146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out = dct_const_round_shift(input[0] * cospi_16_64); 147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang out = dct_const_round_shift(out * cospi_16_64); 148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang a1 = ROUND_POWER_OF_TWO(out, 4); 149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; i++) { 151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest[0] = clip_pixel(dest[0] + a1); 152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest[1] = clip_pixel(dest[1] + a1); 153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest[2] = clip_pixel(dest[2] + a1); 154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest[3] = clip_pixel(dest[3] + a1); 155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang dest += dest_stride; 156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 159b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void idct8(const int16_t *input, int16_t *output) { 160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t step1[8], step2[8]; 161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int temp1, temp2; 162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = input[0]; 164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = input[4]; 165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = input[2]; 166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = input[6]; 167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; 168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; 169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = dct_const_round_shift(temp1); 170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = dct_const_round_shift(temp2); 171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; 172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; 173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = dct_const_round_shift(temp1); 174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = dct_const_round_shift(temp2); 175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 2 & stage 3 - even half 177b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct4(step1, step1); 178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 2 - odd half 180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[4] = step1[4] + step1[5]; 181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[5] = step1[4] - step1[5]; 182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[6] = -step1[6] + step1[7]; 183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[7] = step1[6] + step1[7]; 184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 3 -odd half 186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = step2[4]; 187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step2[6] - step2[5]) * cospi_16_64; 188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step2[5] + step2[6]) * cospi_16_64; 189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = dct_const_round_shift(temp1); 190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = dct_const_round_shift(temp2); 191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = step2[7]; 192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 4 194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = step1[0] + step1[7]; 195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = step1[1] + step1[6]; 196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = step1[2] + step1[5]; 197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = step1[3] + step1[4]; 198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = step1[3] - step1[4]; 199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = step1[2] - step1[5]; 200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = step1[1] - step1[6]; 201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = step1[0] - step1[7]; 202ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 2045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) { 205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out[8 * 8]; 206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *outptr = out; 207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t temp_in[8], temp_out[8]; 209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // First transform rows 211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 8; ++i) { 212b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct8(input, outptr); 213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input += 8; 214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang outptr += 8; 215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Then transform columns 218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 8; ++i) { 219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 8; ++j) 220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = out[j * 8 + i]; 221b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct8(temp_in, temp_out); 222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 8; ++j) 2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) 2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * stride + i]); 225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) { 229f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang int i, j; 230f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang int a1; 231f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang int16_t out = dct_const_round_shift(input[0] * cospi_16_64); 232f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang out = dct_const_round_shift(out * cospi_16_64); 233f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang a1 = ROUND_POWER_OF_TWO(out, 5); 234f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang for (j = 0; j < 8; ++j) { 235f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang for (i = 0; i < 8; ++i) 236f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang dest[i] = clip_pixel(dest[i] + a1); 2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest += stride; 238f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang } 239f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang} 240f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang 241b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void iadst4(const int16_t *input, int16_t *output) { 242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int s0, s1, s2, s3, s4, s5, s6, s7; 243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x0 = input[0]; 245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x1 = input[1]; 246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x2 = input[2]; 247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x3 = input[3]; 248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (!(x0 | x1 | x2 | x3)) { 250ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = output[1] = output[2] = output[3] = 0; 251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return; 252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 254ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = sinpi_1_9 * x0; 255ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = sinpi_2_9 * x0; 256ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = sinpi_3_9 * x1; 257ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = sinpi_4_9 * x2; 258ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = sinpi_1_9 * x2; 259ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = sinpi_2_9 * x3; 260ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = sinpi_4_9 * x3; 261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = x0 - x2 + x3; 262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s3 + s5; 264ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 - s4 - s6; 265ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = sinpi_3_9 * s7; 266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s2; 267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0 + x3; 269ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x1 + x3; 270ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2; 271ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x0 + x1 - x3; 272ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // 1-D transform scaling factor is sqrt(2). 274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // The overall dynamic range is 14b (input) + 14b (multiplication scaling) 275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // + 1b (addition) = 29b. 276ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Hence the output bit depth is 15b. 277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = dct_const_round_shift(s0); 278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = dct_const_round_shift(s1); 279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = dct_const_round_shift(s2); 280ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = dct_const_round_shift(s3); 281ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 282ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride, 2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int tx_type) { 285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const transform_2d IHT_4[] = { 286b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { idct4, idct4 }, // DCT_DCT = 0 287b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { iadst4, idct4 }, // ADST_DCT = 1 288b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { idct4, iadst4 }, // DCT_ADST = 2 289b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { iadst4, iadst4 } // ADST_ADST = 3 290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang }; 291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 293ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out[4 * 4]; 294ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *outptr = out; 295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t temp_in[4], temp_out[4]; 296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // inverse transform row vectors 298ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; ++i) { 299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang IHT_4[tx_type].rows(input, outptr); 300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input += 4; 301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang outptr += 4; 302ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // inverse transform column vectors 305ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; ++i) { 306ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 4; ++j) 307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = out[j * 4 + i]; 308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang IHT_4[tx_type].cols(temp_in, temp_out); 309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 4; ++j) 3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) 3115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * stride + i]); 312ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 314b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void iadst8(const int16_t *input, int16_t *output) { 315ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int s0, s1, s2, s3, s4, s5, s6, s7; 316ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x0 = input[7]; 318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x1 = input[0]; 319ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x2 = input[5]; 320ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x3 = input[2]; 321ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x4 = input[3]; 322ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x5 = input[4]; 323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x6 = input[1]; 324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x7 = input[6]; 325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { 327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = output[1] = output[2] = output[3] = output[4] 328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang = output[5] = output[6] = output[7] = 0; 329ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return; 330ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 331ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 332ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 333ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = cospi_2_64 * x0 + cospi_30_64 * x1; 334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = cospi_30_64 * x0 - cospi_2_64 * x1; 335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = cospi_10_64 * x2 + cospi_22_64 * x3; 336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = cospi_22_64 * x2 - cospi_10_64 * x3; 337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = cospi_18_64 * x4 + cospi_14_64 * x5; 338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = cospi_14_64 * x4 - cospi_18_64 * x5; 339ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = cospi_26_64 * x6 + cospi_6_64 * x7; 340ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = cospi_6_64 * x6 - cospi_26_64 * x7; 341ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 342ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = dct_const_round_shift(s0 + s4); 343ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = dct_const_round_shift(s1 + s5); 344ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = dct_const_round_shift(s2 + s6); 345ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = dct_const_round_shift(s3 + s7); 346ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x4 = dct_const_round_shift(s0 - s4); 347ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x5 = dct_const_round_shift(s1 - s5); 348ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x6 = dct_const_round_shift(s2 - s6); 349ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x7 = dct_const_round_shift(s3 - s7); 350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 2 352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0; 353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x1; 354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2; 355ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x3; 356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = cospi_8_64 * x4 + cospi_24_64 * x5; 357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = cospi_24_64 * x4 - cospi_8_64 * x5; 358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; 359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = cospi_8_64 * x6 + cospi_24_64 * x7; 360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s2; 362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s3; 363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s0 - s2; 364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s1 - s3; 365ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x4 = dct_const_round_shift(s4 + s6); 366ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x5 = dct_const_round_shift(s5 + s7); 367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x6 = dct_const_round_shift(s4 - s6); 368ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x7 = dct_const_round_shift(s5 - s7); 369ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 370ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 3 371ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = cospi_16_64 * (x2 + x3); 372ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = cospi_16_64 * (x2 - x3); 373ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = cospi_16_64 * (x6 + x7); 374ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = cospi_16_64 * (x6 - x7); 375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 376ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = dct_const_round_shift(s2); 377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = dct_const_round_shift(s3); 378ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x6 = dct_const_round_shift(s6); 379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x7 = dct_const_round_shift(s7); 380ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 381ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = x0; 382ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = -x4; 383ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = x6; 384ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = -x2; 385ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = x3; 386ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = -x7; 387ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = x5; 388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = -x1; 389ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 390ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic const transform_2d IHT_8[] = { 392b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { idct8, idct8 }, // DCT_DCT = 0 393b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { iadst8, idct8 }, // ADST_DCT = 1 394b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { idct8, iadst8 }, // DCT_ADST = 2 395b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { iadst8, iadst8 } // ADST_ADST = 3 396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}; 397ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 3985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride, 3995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int tx_type) { 400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 401ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out[8 * 8]; 402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *outptr = out; 403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t temp_in[8], temp_out[8]; 404ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const transform_2d ht = IHT_8[tx_type]; 405ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // inverse transform row vectors 407ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 8; ++i) { 408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ht.rows(input, outptr); 409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input += 8; 410ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang outptr += 8; 411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 412ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 413ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // inverse transform column vectors 414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 8; ++i) { 415ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 8; ++j) 416ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = out[j * 8 + i]; 417ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ht.cols(temp_in, temp_out); 418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 8; ++j) 4195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) 4205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * stride + i]); 4215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 422ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 423ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 4245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) { 42591037db265ecdd914a26e056cf69207b4f50924ehkuang int16_t out[8 * 8] = { 0 }; 426ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *outptr = out; 427ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 428ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t temp_in[8], temp_out[8]; 429ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 430ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // First transform rows 431ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // only first 4 row has non-zero coefs 432ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; ++i) { 433b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct8(input, outptr); 434ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input += 8; 435ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang outptr += 8; 436ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 437ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 438ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Then transform columns 439ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 8; ++i) { 440ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 8; ++j) 441ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = out[j * 8 + i]; 442b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct8(temp_in, temp_out); 443ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 8; ++j) 4445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) 4455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * stride + i]); 446ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 447ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 448ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 449b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void idct16(const int16_t *input, int16_t *output) { 450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t step1[16], step2[16]; 451ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int temp1, temp2; 452ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 453ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 454ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = input[0/2]; 455ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = input[16/2]; 456ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = input[8/2]; 457ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = input[24/2]; 458ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = input[4/2]; 459ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = input[20/2]; 460ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = input[12/2]; 461ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = input[28/2]; 462ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[8] = input[2/2]; 463ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[9] = input[18/2]; 464ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[10] = input[10/2]; 465ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[11] = input[26/2]; 466ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[12] = input[6/2]; 467ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[13] = input[22/2]; 468ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[14] = input[14/2]; 469ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[15] = input[30/2]; 470ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 471ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 2 472ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[0] = step1[0]; 473ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[1] = step1[1]; 474ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[2] = step1[2]; 475ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[3] = step1[3]; 476ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[4] = step1[4]; 477ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[5] = step1[5]; 478ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[6] = step1[6]; 479ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[7] = step1[7]; 480ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 481ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; 482ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; 483ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[8] = dct_const_round_shift(temp1); 484ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[15] = dct_const_round_shift(temp2); 485ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 486ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; 487ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; 488ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[9] = dct_const_round_shift(temp1); 489ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[14] = dct_const_round_shift(temp2); 490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 491ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; 492ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; 493ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[10] = dct_const_round_shift(temp1); 494ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[13] = dct_const_round_shift(temp2); 495ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 496ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; 497ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; 498ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[11] = dct_const_round_shift(temp1); 499ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[12] = dct_const_round_shift(temp2); 500ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 501ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 3 502ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = step2[0]; 503ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = step2[1]; 504ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = step2[2]; 505ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = step2[3]; 506ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 507ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; 508ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; 509ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = dct_const_round_shift(temp1); 510ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = dct_const_round_shift(temp2); 511ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; 512ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; 513ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = dct_const_round_shift(temp1); 514ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = dct_const_round_shift(temp2); 515ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 516ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[8] = step2[8] + step2[9]; 517ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[9] = step2[8] - step2[9]; 518ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[10] = -step2[10] + step2[11]; 519ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[11] = step2[10] + step2[11]; 520ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[12] = step2[12] + step2[13]; 521ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[13] = step2[12] - step2[13]; 522ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[14] = -step2[14] + step2[15]; 523ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[15] = step2[14] + step2[15]; 524ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 52591037db265ecdd914a26e056cf69207b4f50924ehkuang // stage 4 526ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step1[0] + step1[1]) * cospi_16_64; 527ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[0] - step1[1]) * cospi_16_64; 528ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[0] = dct_const_round_shift(temp1); 529ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[1] = dct_const_round_shift(temp2); 530ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; 531ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; 532ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[2] = dct_const_round_shift(temp1); 533ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[3] = dct_const_round_shift(temp2); 534ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[4] = step1[4] + step1[5]; 535ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[5] = step1[4] - step1[5]; 536ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[6] = -step1[6] + step1[7]; 537ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[7] = step1[6] + step1[7]; 538ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 539ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[8] = step1[8]; 540ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[15] = step1[15]; 541ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; 542ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; 543ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[9] = dct_const_round_shift(temp1); 544ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[14] = dct_const_round_shift(temp2); 545ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; 546ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; 547ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[10] = dct_const_round_shift(temp1); 548ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[13] = dct_const_round_shift(temp2); 549ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[11] = step1[11]; 550ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[12] = step1[12]; 551ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 552ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 5 553ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = step2[0] + step2[3]; 554ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = step2[1] + step2[2]; 555ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = step2[1] - step2[2]; 556ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = step2[0] - step2[3]; 557ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = step2[4]; 558ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step2[6] - step2[5]) * cospi_16_64; 559ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step2[5] + step2[6]) * cospi_16_64; 560ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = dct_const_round_shift(temp1); 561ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = dct_const_round_shift(temp2); 562ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = step2[7]; 563ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 564ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[8] = step2[8] + step2[11]; 565ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[9] = step2[9] + step2[10]; 566ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[10] = step2[9] - step2[10]; 567ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[11] = step2[8] - step2[11]; 568ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[12] = -step2[12] + step2[15]; 569ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[13] = -step2[13] + step2[14]; 570ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[14] = step2[13] + step2[14]; 571ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[15] = step2[12] + step2[15]; 572ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 573ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 6 574ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[0] = step1[0] + step1[7]; 575ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[1] = step1[1] + step1[6]; 576ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[2] = step1[2] + step1[5]; 577ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[3] = step1[3] + step1[4]; 578ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[4] = step1[3] - step1[4]; 579ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[5] = step1[2] - step1[5]; 580ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[6] = step1[1] - step1[6]; 581ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[7] = step1[0] - step1[7]; 582ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[8] = step1[8]; 583ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[9] = step1[9]; 584ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (-step1[10] + step1[13]) * cospi_16_64; 585ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[10] + step1[13]) * cospi_16_64; 586ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[10] = dct_const_round_shift(temp1); 587ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[13] = dct_const_round_shift(temp2); 588ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (-step1[11] + step1[12]) * cospi_16_64; 589ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[11] + step1[12]) * cospi_16_64; 590ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[11] = dct_const_round_shift(temp1); 591ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[12] = dct_const_round_shift(temp2); 592ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[14] = step1[14]; 593ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[15] = step1[15]; 594ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 595ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 7 596ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = step2[0] + step2[15]; 597ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = step2[1] + step2[14]; 598ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = step2[2] + step2[13]; 599ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = step2[3] + step2[12]; 600ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = step2[4] + step2[11]; 601ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = step2[5] + step2[10]; 602ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = step2[6] + step2[9]; 603ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = step2[7] + step2[8]; 604ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[8] = step2[7] - step2[8]; 605ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[9] = step2[6] - step2[9]; 606ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[10] = step2[5] - step2[10]; 607ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[11] = step2[4] - step2[11]; 608ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[12] = step2[3] - step2[12]; 609ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[13] = step2[2] - step2[13]; 610ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[14] = step2[1] - step2[14]; 611ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[15] = step2[0] - step2[15]; 612ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 613ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 6145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) { 615ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out[16 * 16]; 616ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *outptr = out; 617ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 618ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t temp_in[16], temp_out[16]; 619ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 620ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // First transform rows 621ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 16; ++i) { 622b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct16(input, outptr); 623ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input += 16; 624ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang outptr += 16; 625ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 626ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 627ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Then transform columns 628ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 16; ++i) { 629ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 16; ++j) 630ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = out[j * 16 + i]; 631b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct16(temp_in, temp_out); 632ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 16; ++j) 6335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) 6345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * stride + i]); 635ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 636ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 637ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 638b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void iadst16(const int16_t *input, int16_t *output) { 639ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; 640ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 641ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x0 = input[15]; 642ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x1 = input[0]; 643ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x2 = input[13]; 644ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x3 = input[2]; 645ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x4 = input[11]; 646ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x5 = input[4]; 647ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x6 = input[9]; 648ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x7 = input[6]; 649ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x8 = input[7]; 650ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x9 = input[8]; 651ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x10 = input[5]; 652ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x11 = input[10]; 653ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x12 = input[3]; 654ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x13 = input[12]; 655ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x14 = input[1]; 656ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int x15 = input[14]; 657ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 658ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 659ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { 660ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = output[1] = output[2] = output[3] = output[4] 661ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang = output[5] = output[6] = output[7] = output[8] 662ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang = output[9] = output[10] = output[11] = output[12] 663ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang = output[13] = output[14] = output[15] = 0; 664ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang return; 665ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 666ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 667ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 668ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0 * cospi_1_64 + x1 * cospi_31_64; 669ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x0 * cospi_31_64 - x1 * cospi_1_64; 670ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2 * cospi_5_64 + x3 * cospi_27_64; 671ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x2 * cospi_27_64 - x3 * cospi_5_64; 672ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = x4 * cospi_9_64 + x5 * cospi_23_64; 673ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = x4 * cospi_23_64 - x5 * cospi_9_64; 674ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = x6 * cospi_13_64 + x7 * cospi_19_64; 675ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = x6 * cospi_19_64 - x7 * cospi_13_64; 676ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s8 = x8 * cospi_17_64 + x9 * cospi_15_64; 677ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s9 = x8 * cospi_15_64 - x9 * cospi_17_64; 678ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s10 = x10 * cospi_21_64 + x11 * cospi_11_64; 679ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s11 = x10 * cospi_11_64 - x11 * cospi_21_64; 680ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s12 = x12 * cospi_25_64 + x13 * cospi_7_64; 681ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s13 = x12 * cospi_7_64 - x13 * cospi_25_64; 682ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s14 = x14 * cospi_29_64 + x15 * cospi_3_64; 683ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s15 = x14 * cospi_3_64 - x15 * cospi_29_64; 684ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 685ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = dct_const_round_shift(s0 + s8); 686ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = dct_const_round_shift(s1 + s9); 687ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = dct_const_round_shift(s2 + s10); 688ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = dct_const_round_shift(s3 + s11); 689ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x4 = dct_const_round_shift(s4 + s12); 690ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x5 = dct_const_round_shift(s5 + s13); 691ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x6 = dct_const_round_shift(s6 + s14); 692ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x7 = dct_const_round_shift(s7 + s15); 693ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x8 = dct_const_round_shift(s0 - s8); 694ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x9 = dct_const_round_shift(s1 - s9); 695ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x10 = dct_const_round_shift(s2 - s10); 696ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x11 = dct_const_round_shift(s3 - s11); 697ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x12 = dct_const_round_shift(s4 - s12); 698ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x13 = dct_const_round_shift(s5 - s13); 699ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x14 = dct_const_round_shift(s6 - s14); 700ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x15 = dct_const_round_shift(s7 - s15); 701ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 702ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 2 703ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0; 704ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x1; 705ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2; 706ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x3; 707ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = x4; 708ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = x5; 709ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = x6; 710ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = x7; 711ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s8 = x8 * cospi_4_64 + x9 * cospi_28_64; 712ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s9 = x8 * cospi_28_64 - x9 * cospi_4_64; 713ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s10 = x10 * cospi_20_64 + x11 * cospi_12_64; 714ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s11 = x10 * cospi_12_64 - x11 * cospi_20_64; 715ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; 716ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s13 = x12 * cospi_4_64 + x13 * cospi_28_64; 717ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; 718ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s15 = x14 * cospi_20_64 + x15 * cospi_12_64; 719ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 720ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s4; 721ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s5; 722ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s2 + s6; 723ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s3 + s7; 724ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x4 = s0 - s4; 725ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x5 = s1 - s5; 726ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x6 = s2 - s6; 727ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x7 = s3 - s7; 728ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x8 = dct_const_round_shift(s8 + s12); 729ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x9 = dct_const_round_shift(s9 + s13); 730ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x10 = dct_const_round_shift(s10 + s14); 731ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x11 = dct_const_round_shift(s11 + s15); 732ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x12 = dct_const_round_shift(s8 - s12); 733ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x13 = dct_const_round_shift(s9 - s13); 734ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x14 = dct_const_round_shift(s10 - s14); 735ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x15 = dct_const_round_shift(s11 - s15); 736ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 737ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 3 738ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s0 = x0; 739ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s1 = x1; 740ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = x2; 741ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = x3; 742ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s4 = x4 * cospi_8_64 + x5 * cospi_24_64; 743ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s5 = x4 * cospi_24_64 - x5 * cospi_8_64; 744ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; 745ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = x6 * cospi_8_64 + x7 * cospi_24_64; 746ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s8 = x8; 747ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s9 = x9; 748ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s10 = x10; 749ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s11 = x11; 750ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s12 = x12 * cospi_8_64 + x13 * cospi_24_64; 751ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s13 = x12 * cospi_24_64 - x13 * cospi_8_64; 752ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; 753ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s15 = x14 * cospi_8_64 + x15 * cospi_24_64; 754ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 755ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x0 = s0 + s2; 756ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x1 = s1 + s3; 757ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = s0 - s2; 758ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = s1 - s3; 759ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x4 = dct_const_round_shift(s4 + s6); 760ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x5 = dct_const_round_shift(s5 + s7); 761ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x6 = dct_const_round_shift(s4 - s6); 762ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x7 = dct_const_round_shift(s5 - s7); 763ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x8 = s8 + s10; 764ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x9 = s9 + s11; 765ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x10 = s8 - s10; 766ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x11 = s9 - s11; 767ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x12 = dct_const_round_shift(s12 + s14); 768ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x13 = dct_const_round_shift(s13 + s15); 769ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x14 = dct_const_round_shift(s12 - s14); 770ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x15 = dct_const_round_shift(s13 - s15); 771ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 772ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 4 773ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s2 = (- cospi_16_64) * (x2 + x3); 774ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s3 = cospi_16_64 * (x2 - x3); 775ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s6 = cospi_16_64 * (x6 + x7); 776ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s7 = cospi_16_64 * (- x6 + x7); 777ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s10 = cospi_16_64 * (x10 + x11); 778ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s11 = cospi_16_64 * (- x10 + x11); 779ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s14 = (- cospi_16_64) * (x14 + x15); 780ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang s15 = cospi_16_64 * (x14 - x15); 781ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 782ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x2 = dct_const_round_shift(s2); 783ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x3 = dct_const_round_shift(s3); 784ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x6 = dct_const_round_shift(s6); 785ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x7 = dct_const_round_shift(s7); 786ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x10 = dct_const_round_shift(s10); 787ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x11 = dct_const_round_shift(s11); 788ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x14 = dct_const_round_shift(s14); 789ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang x15 = dct_const_round_shift(s15); 790ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 791ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = x0; 792ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = -x8; 793ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = x12; 794ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = -x4; 795ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = x6; 796ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = x14; 797ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = x10; 798ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = x2; 799ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[8] = x3; 800ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[9] = x11; 801ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[10] = x15; 802ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[11] = x7; 803ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[12] = x5; 804ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[13] = -x13; 805ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[14] = x9; 806ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[15] = -x1; 807ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 808ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 809ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic const transform_2d IHT_16[] = { 810b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { idct16, idct16 }, // DCT_DCT = 0 811b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { iadst16, idct16 }, // ADST_DCT = 1 812b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { idct16, iadst16 }, // DCT_ADST = 2 813b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian { iadst16, iadst16 } // ADST_ADST = 3 814ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}; 815ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 8165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride, 8175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int tx_type) { 818ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 819ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out[16 * 16]; 820ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *outptr = out; 821ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t temp_in[16], temp_out[16]; 822ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang const transform_2d ht = IHT_16[tx_type]; 823ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 824ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Rows 825ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 16; ++i) { 826ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ht.rows(input, outptr); 827ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input += 16; 828ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang outptr += 16; 829ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 830ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 831ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Columns 832ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 16; ++i) { 833ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 16; ++j) 834ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = out[j * 16 + i]; 835ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang ht.cols(temp_in, temp_out); 836ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 16; ++j) 8375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) 838b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian + dest[j * stride + i]); 839b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian } 840ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 841ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 8425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) { 84391037db265ecdd914a26e056cf69207b4f50924ehkuang int16_t out[16 * 16] = { 0 }; 844ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *outptr = out; 845ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 846ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t temp_in[16], temp_out[16]; 847ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 84891037db265ecdd914a26e056cf69207b4f50924ehkuang // First transform rows. Since all non-zero dct coefficients are in 84991037db265ecdd914a26e056cf69207b4f50924ehkuang // upper-left 4x4 area, we only need to calculate first 4 rows here. 850ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 4; ++i) { 851b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct16(input, outptr); 852ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input += 16; 853ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang outptr += 16; 854ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 855ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 856ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Then transform columns 857ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 16; ++i) { 858ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 16; ++j) 859ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = out[j*16 + i]; 860b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct16(temp_in, temp_out); 861ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 16; ++j) 8625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) 8635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * stride + i]); 864ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 865ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 866ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 8675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) { 868f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang int i, j; 869f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang int a1; 870ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out = dct_const_round_shift(input[0] * cospi_16_64); 871ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang out = dct_const_round_shift(out * cospi_16_64); 872f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang a1 = ROUND_POWER_OF_TWO(out, 6); 873f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang for (j = 0; j < 16; ++j) { 874f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang for (i = 0; i < 16; ++i) 875f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang dest[i] = clip_pixel(dest[i] + a1); 8765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest += stride; 877f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang } 878ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 879ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 880b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void idct32(const int16_t *input, int16_t *output) { 881ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t step1[32], step2[32]; 882ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int temp1, temp2; 883ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 884ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 1 885ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = input[0]; 886ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = input[16]; 887ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = input[8]; 888ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = input[24]; 889ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = input[4]; 890ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = input[20]; 891ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = input[12]; 892ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = input[28]; 893ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[8] = input[2]; 894ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[9] = input[18]; 895ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[10] = input[10]; 896ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[11] = input[26]; 897ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[12] = input[6]; 898ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[13] = input[22]; 899ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[14] = input[14]; 900ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[15] = input[30]; 901ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 902ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; 903ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; 904ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[16] = dct_const_round_shift(temp1); 905ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[31] = dct_const_round_shift(temp2); 906ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 907ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; 908ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; 909ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[17] = dct_const_round_shift(temp1); 910ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[30] = dct_const_round_shift(temp2); 911ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 912ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; 913ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; 914ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[18] = dct_const_round_shift(temp1); 915ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[29] = dct_const_round_shift(temp2); 916ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 917ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; 918ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; 919ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[19] = dct_const_round_shift(temp1); 920ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[28] = dct_const_round_shift(temp2); 921ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 922ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; 923ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; 924ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[20] = dct_const_round_shift(temp1); 925ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[27] = dct_const_round_shift(temp2); 926ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 927ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; 928ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; 929ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[21] = dct_const_round_shift(temp1); 930ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[26] = dct_const_round_shift(temp2); 931ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 932ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; 933ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; 934ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[22] = dct_const_round_shift(temp1); 935ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[25] = dct_const_round_shift(temp2); 936ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 937ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; 938ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; 939ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[23] = dct_const_round_shift(temp1); 940ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[24] = dct_const_round_shift(temp2); 941ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 942ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 2 943ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[0] = step1[0]; 944ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[1] = step1[1]; 945ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[2] = step1[2]; 946ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[3] = step1[3]; 947ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[4] = step1[4]; 948ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[5] = step1[5]; 949ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[6] = step1[6]; 950ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[7] = step1[7]; 951ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 952ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; 953ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; 954ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[8] = dct_const_round_shift(temp1); 955ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[15] = dct_const_round_shift(temp2); 956ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 957ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; 958ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; 959ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[9] = dct_const_round_shift(temp1); 960ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[14] = dct_const_round_shift(temp2); 961ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 962ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; 963ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; 964ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[10] = dct_const_round_shift(temp1); 965ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[13] = dct_const_round_shift(temp2); 966ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 967ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; 968ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; 969ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[11] = dct_const_round_shift(temp1); 970ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[12] = dct_const_round_shift(temp2); 971ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 972ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[16] = step1[16] + step1[17]; 973ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[17] = step1[16] - step1[17]; 974ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[18] = -step1[18] + step1[19]; 975ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[19] = step1[18] + step1[19]; 976ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[20] = step1[20] + step1[21]; 977ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[21] = step1[20] - step1[21]; 978ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[22] = -step1[22] + step1[23]; 979ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[23] = step1[22] + step1[23]; 980ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[24] = step1[24] + step1[25]; 981ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[25] = step1[24] - step1[25]; 982ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[26] = -step1[26] + step1[27]; 983ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[27] = step1[26] + step1[27]; 984ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[28] = step1[28] + step1[29]; 985ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[29] = step1[28] - step1[29]; 986ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[30] = -step1[30] + step1[31]; 987ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[31] = step1[30] + step1[31]; 988ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 989ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 3 990ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = step2[0]; 991ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = step2[1]; 992ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = step2[2]; 993ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = step2[3]; 994ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 995ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; 996ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; 997ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = dct_const_round_shift(temp1); 998ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = dct_const_round_shift(temp2); 999ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; 1000ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; 1001ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = dct_const_round_shift(temp1); 1002ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = dct_const_round_shift(temp2); 1003ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1004ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[8] = step2[8] + step2[9]; 1005ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[9] = step2[8] - step2[9]; 1006ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[10] = -step2[10] + step2[11]; 1007ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[11] = step2[10] + step2[11]; 1008ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[12] = step2[12] + step2[13]; 1009ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[13] = step2[12] - step2[13]; 1010ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[14] = -step2[14] + step2[15]; 1011ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[15] = step2[14] + step2[15]; 1012ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1013ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[16] = step2[16]; 1014ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[31] = step2[31]; 1015ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; 1016ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; 1017ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[17] = dct_const_round_shift(temp1); 1018ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[30] = dct_const_round_shift(temp2); 1019ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; 1020ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; 1021ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[18] = dct_const_round_shift(temp1); 1022ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[29] = dct_const_round_shift(temp2); 1023ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[19] = step2[19]; 1024ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[20] = step2[20]; 1025ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; 1026ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; 1027ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[21] = dct_const_round_shift(temp1); 1028ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[26] = dct_const_round_shift(temp2); 1029ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; 1030ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; 1031ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[22] = dct_const_round_shift(temp1); 1032ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[25] = dct_const_round_shift(temp2); 1033ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[23] = step2[23]; 1034ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[24] = step2[24]; 1035ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[27] = step2[27]; 1036ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[28] = step2[28]; 1037ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1038ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 4 1039ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step1[0] + step1[1]) * cospi_16_64; 1040ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[0] - step1[1]) * cospi_16_64; 1041ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[0] = dct_const_round_shift(temp1); 1042ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[1] = dct_const_round_shift(temp2); 1043ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; 1044ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; 1045ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[2] = dct_const_round_shift(temp1); 1046ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[3] = dct_const_round_shift(temp2); 1047ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[4] = step1[4] + step1[5]; 1048ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[5] = step1[4] - step1[5]; 1049ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[6] = -step1[6] + step1[7]; 1050ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[7] = step1[6] + step1[7]; 1051ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1052ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[8] = step1[8]; 1053ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[15] = step1[15]; 1054ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; 1055ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; 1056ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[9] = dct_const_round_shift(temp1); 1057ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[14] = dct_const_round_shift(temp2); 1058ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; 1059ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; 1060ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[10] = dct_const_round_shift(temp1); 1061ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[13] = dct_const_round_shift(temp2); 1062ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[11] = step1[11]; 1063ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[12] = step1[12]; 1064ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1065ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[16] = step1[16] + step1[19]; 1066ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[17] = step1[17] + step1[18]; 1067ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[18] = step1[17] - step1[18]; 1068ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[19] = step1[16] - step1[19]; 1069ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[20] = -step1[20] + step1[23]; 1070ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[21] = -step1[21] + step1[22]; 1071ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[22] = step1[21] + step1[22]; 1072ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[23] = step1[20] + step1[23]; 1073ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1074ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[24] = step1[24] + step1[27]; 1075ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[25] = step1[25] + step1[26]; 1076ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[26] = step1[25] - step1[26]; 1077ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[27] = step1[24] - step1[27]; 1078ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[28] = -step1[28] + step1[31]; 1079ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[29] = -step1[29] + step1[30]; 1080ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[30] = step1[29] + step1[30]; 1081ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[31] = step1[28] + step1[31]; 1082ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1083ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 5 1084ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = step2[0] + step2[3]; 1085ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = step2[1] + step2[2]; 1086ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = step2[1] - step2[2]; 1087ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = step2[0] - step2[3]; 1088ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = step2[4]; 1089ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (step2[6] - step2[5]) * cospi_16_64; 1090ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step2[5] + step2[6]) * cospi_16_64; 1091ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = dct_const_round_shift(temp1); 1092ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = dct_const_round_shift(temp2); 1093ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = step2[7]; 1094ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1095ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[8] = step2[8] + step2[11]; 1096ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[9] = step2[9] + step2[10]; 1097ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[10] = step2[9] - step2[10]; 1098ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[11] = step2[8] - step2[11]; 1099ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[12] = -step2[12] + step2[15]; 1100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[13] = -step2[13] + step2[14]; 1101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[14] = step2[13] + step2[14]; 1102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[15] = step2[12] + step2[15]; 1103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[16] = step2[16]; 1105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[17] = step2[17]; 1106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; 1107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; 1108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[18] = dct_const_round_shift(temp1); 1109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[29] = dct_const_round_shift(temp2); 1110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; 1111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; 1112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[19] = dct_const_round_shift(temp1); 1113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[28] = dct_const_round_shift(temp2); 1114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; 1115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; 1116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[20] = dct_const_round_shift(temp1); 1117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[27] = dct_const_round_shift(temp2); 1118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; 1119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; 1120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[21] = dct_const_round_shift(temp1); 1121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[26] = dct_const_round_shift(temp2); 1122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[22] = step2[22]; 1123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[23] = step2[23]; 1124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[24] = step2[24]; 1125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[25] = step2[25]; 1126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[30] = step2[30]; 1127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[31] = step2[31]; 1128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 6 1130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[0] = step1[0] + step1[7]; 1131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[1] = step1[1] + step1[6]; 1132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[2] = step1[2] + step1[5]; 1133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[3] = step1[3] + step1[4]; 1134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[4] = step1[3] - step1[4]; 1135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[5] = step1[2] - step1[5]; 1136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[6] = step1[1] - step1[6]; 1137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[7] = step1[0] - step1[7]; 1138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[8] = step1[8]; 1139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[9] = step1[9]; 1140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (-step1[10] + step1[13]) * cospi_16_64; 1141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[10] + step1[13]) * cospi_16_64; 1142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[10] = dct_const_round_shift(temp1); 1143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[13] = dct_const_round_shift(temp2); 1144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (-step1[11] + step1[12]) * cospi_16_64; 1145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step1[11] + step1[12]) * cospi_16_64; 1146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[11] = dct_const_round_shift(temp1); 1147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[12] = dct_const_round_shift(temp2); 1148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[14] = step1[14]; 1149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[15] = step1[15]; 1150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[16] = step1[16] + step1[23]; 1152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[17] = step1[17] + step1[22]; 1153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[18] = step1[18] + step1[21]; 1154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[19] = step1[19] + step1[20]; 1155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[20] = step1[19] - step1[20]; 1156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[21] = step1[18] - step1[21]; 1157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[22] = step1[17] - step1[22]; 1158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[23] = step1[16] - step1[23]; 1159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[24] = -step1[24] + step1[31]; 1161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[25] = -step1[25] + step1[30]; 1162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[26] = -step1[26] + step1[29]; 1163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[27] = -step1[27] + step1[28]; 1164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[28] = step1[27] + step1[28]; 1165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[29] = step1[26] + step1[29]; 1166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[30] = step1[25] + step1[30]; 1167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step2[31] = step1[24] + step1[31]; 1168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // stage 7 1170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[0] = step2[0] + step2[15]; 1171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[1] = step2[1] + step2[14]; 1172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[2] = step2[2] + step2[13]; 1173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[3] = step2[3] + step2[12]; 1174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[4] = step2[4] + step2[11]; 1175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[5] = step2[5] + step2[10]; 1176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[6] = step2[6] + step2[9]; 1177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[7] = step2[7] + step2[8]; 1178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[8] = step2[7] - step2[8]; 1179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[9] = step2[6] - step2[9]; 1180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[10] = step2[5] - step2[10]; 1181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[11] = step2[4] - step2[11]; 1182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[12] = step2[3] - step2[12]; 1183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[13] = step2[2] - step2[13]; 1184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[14] = step2[1] - step2[14]; 1185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[15] = step2[0] - step2[15]; 1186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[16] = step2[16]; 1188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[17] = step2[17]; 1189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[18] = step2[18]; 1190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[19] = step2[19]; 1191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (-step2[20] + step2[27]) * cospi_16_64; 1192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step2[20] + step2[27]) * cospi_16_64; 1193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[20] = dct_const_round_shift(temp1); 1194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[27] = dct_const_round_shift(temp2); 1195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (-step2[21] + step2[26]) * cospi_16_64; 1196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step2[21] + step2[26]) * cospi_16_64; 1197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[21] = dct_const_round_shift(temp1); 1198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[26] = dct_const_round_shift(temp2); 1199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (-step2[22] + step2[25]) * cospi_16_64; 1200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step2[22] + step2[25]) * cospi_16_64; 1201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[22] = dct_const_round_shift(temp1); 1202ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[25] = dct_const_round_shift(temp2); 1203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp1 = (-step2[23] + step2[24]) * cospi_16_64; 1204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp2 = (step2[23] + step2[24]) * cospi_16_64; 1205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[23] = dct_const_round_shift(temp1); 1206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[24] = dct_const_round_shift(temp2); 1207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[28] = step2[28]; 1208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[29] = step2[29]; 1209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[30] = step2[30]; 1210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang step1[31] = step2[31]; 1211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // final stage 1213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[0] = step1[0] + step1[31]; 1214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[1] = step1[1] + step1[30]; 1215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[2] = step1[2] + step1[29]; 1216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[3] = step1[3] + step1[28]; 1217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[4] = step1[4] + step1[27]; 1218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[5] = step1[5] + step1[26]; 1219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[6] = step1[6] + step1[25]; 1220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[7] = step1[7] + step1[24]; 1221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[8] = step1[8] + step1[23]; 1222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[9] = step1[9] + step1[22]; 1223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[10] = step1[10] + step1[21]; 1224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[11] = step1[11] + step1[20]; 1225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[12] = step1[12] + step1[19]; 1226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[13] = step1[13] + step1[18]; 1227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[14] = step1[14] + step1[17]; 1228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[15] = step1[15] + step1[16]; 1229ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[16] = step1[15] - step1[16]; 1230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[17] = step1[14] - step1[17]; 1231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[18] = step1[13] - step1[18]; 1232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[19] = step1[12] - step1[19]; 1233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[20] = step1[11] - step1[20]; 1234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[21] = step1[10] - step1[21]; 1235ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[22] = step1[9] - step1[22]; 1236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[23] = step1[8] - step1[23]; 1237ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[24] = step1[7] - step1[24]; 1238ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[25] = step1[6] - step1[25]; 1239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[26] = step1[5] - step1[26]; 1240ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[27] = step1[4] - step1[27]; 1241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[28] = step1[3] - step1[28]; 1242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[29] = step1[2] - step1[29]; 1243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[30] = step1[1] - step1[30]; 1244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang output[31] = step1[0] - step1[31]; 1245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 1246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 12475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) { 1248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out[32 * 32]; 1249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t *outptr = out; 1250ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int i, j; 1251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t temp_in[32], temp_out[32]; 1252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Rows 1254ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 32; ++i) { 12555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t zero_coeff[16]; 12565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 16; ++j) 12575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang zero_coeff[j] = input[2 * j] | input[2 * j + 1]; 12585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 8; ++j) 12595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; 12605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 4; ++j) 12615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; 12625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 2; ++j) 12635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; 12645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (zero_coeff[0] | zero_coeff[1]) 1266b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct32(input, outptr); 12675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang else 12685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vpx_memset(outptr, 0, sizeof(int16_t) * 32); 12695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input += 32; 12705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang outptr += 32; 12715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 12725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // Columns 12745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 32; ++i) { 12755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 32; ++j) 12765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp_in[j] = out[j * 32 + i]; 1277b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct32(temp_in, temp_out); 12785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 32; ++j) 12795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) 1280b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian + dest[j * stride + i]); 12815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 12825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 12835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) { 12855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t out[32 * 32] = {0}; 12865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t *outptr = out; 12875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int i, j; 12885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t temp_in[32], temp_out[32]; 12895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // Rows 12915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // only upper-left 8x8 has non-zero coeff 12925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 8; ++i) { 1293b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct32(input, outptr); 1294ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang input += 32; 1295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang outptr += 32; 1296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 1297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 1298ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang // Columns 1299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (i = 0; i < 32; ++i) { 1300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 1301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang temp_in[j] = out[j * 32 + i]; 1302b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian idct32(temp_in, temp_out); 1303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang for (j = 0; j < 32; ++j) 13045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) 13055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * stride + i]); 1306ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang } 1307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 1308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang 13095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) { 13105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int i, j; 13115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int a1; 13125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang int16_t out = dct_const_round_shift(input[0] * cospi_16_64); 1314ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang out = dct_const_round_shift(out * cospi_16_64); 13155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang a1 = ROUND_POWER_OF_TWO(out, 6); 13165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 32; ++j) { 13185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 32; ++i) 13195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[i] = clip_pixel(dest[i] + a1); 13205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest += stride; 13215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 13225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 13235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang// idct 13255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { 13265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (eob > 1) 13275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_idct4x4_16_add(input, dest, stride); 13285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang else 13295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_idct4x4_1_add(input, dest, stride); 13305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 13315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { 13345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (eob > 1) 13355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_iwht4x4_16_add(input, dest, stride); 13365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang else 13375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_iwht4x4_1_add(input, dest, stride); 13385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 13395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) { 13415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // If dc is 1, then input[0] is the reconstructed value, do not need 13425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. 13435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // The calculation can be simplified if there are not many non-zero dct 13455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // coefficients. Use eobs to decide what to do. 13465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. 13475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // Combine that with code here. 1348b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian if (eob == 1) 1349b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian // DC only DCT coefficient 1350b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct8x8_1_add(input, dest, stride); 1351b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian else if (eob <= 10) 1352b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct8x8_10_add(input, dest, stride); 1353b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian else 1354b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct8x8_64_add(input, dest, stride); 13555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 13565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, 13585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int eob) { 13595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* The calculation can be simplified if there are not many non-zero dct 13605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * coefficients. Use eobs to separate different cases. */ 1361b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian if (eob == 1) 1362b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian /* DC only DCT coefficient. */ 1363b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct16x16_1_add(input, dest, stride); 1364b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian else if (eob <= 10) 1365b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct16x16_10_add(input, dest, stride); 1366b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian else 1367b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct16x16_256_add(input, dest, stride); 13685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 13695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride, 13715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int eob) { 1372b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian if (eob == 1) 1373b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct32x32_1_add(input, dest, stride); 1374b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian else if (eob <= 34) 1375b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian // non-zero coeff only in upper-left 8x8 1376b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct32x32_34_add(input, dest, stride); 1377b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian else 1378b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct32x32_1024_add(input, dest, stride); 13795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 13805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang// iht 13825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, 13835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int stride, int eob) { 13845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (tx_type == DCT_DCT) 13855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_idct4x4_add(input, dest, stride, eob); 13865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang else 13875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_iht4x4_16_add(input, dest, stride, tx_type); 13885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 13895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, 13915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int stride, int eob) { 13925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (tx_type == DCT_DCT) { 13935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_idct8x8_add(input, dest, stride, eob); 13945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 1395b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_iht8x8_64_add(input, dest, stride, tx_type); 13965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 13975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 13985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 13995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, 14005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int stride, int eob) { 14015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (tx_type == DCT_DCT) { 14025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_idct16x16_add(input, dest, stride, eob); 14035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 1404b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_iht16x16_256_add(input, dest, stride, tx_type); 14055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang} 1407