15ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang/* 25ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 35ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * 45ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Use of this source code is governed by a BSD-style license 55ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * that can be found in the LICENSE file in the root of the source 65ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * tree. An additional intellectual property rights grant can be found 75ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * in the file PATENTS. All contributing project authors may 85ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * be found in the AUTHORS file in the root of the source tree. 95ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <assert.h> 125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <stdio.h> 135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vpx_config.h" 155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vp9_rtcd.h" 165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_common.h" 175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_blockd.h" 185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_idct.h" 195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/mips/dspr2/vp9_common_dspr2.h" 205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#if HAVE_DSPR2 22b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void vp9_idct4_rows_dspr2(const int16_t *input, int16_t *output) { 235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t step_0, step_1, step_2, step_3; 245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int Temp0, Temp1, Temp2, Temp3; 255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int const_2_power_13 = 8192; 265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int i; 275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 4; i--; ) { 295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp_1 = (input[0] + input[2]) * cospi_16_64; 325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step_0 = dct_const_round_shift(temp_1); 335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp_2 = (input[0] - input[2]) * cospi_16_64; 355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step_1 = dct_const_round_shift(temp_2); 365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lh %[Temp0], 0(%[input]) \n\t" 385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lh %[Temp1], 4(%[input]) \n\t" 395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[const_2_power_13], $ac0 \n\t" 405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac0 \n\t" 415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[const_2_power_13], $ac1 \n\t" 425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp2], %[Temp0], %[Temp1] \n\t" 445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sub %[Temp3], %[Temp0], %[Temp1] \n\t" 455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac0, %[Temp2], %[cospi_16_64] \n\t" 465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lh %[Temp0], 2(%[input]) \n\t" 475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lh %[Temp1], 6(%[input]) \n\t" 485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[step_0], $ac0, 31 \n\t" 495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[const_2_power_13], $ac0 \n\t" 505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac0 \n\t" 515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac1, %[Temp3], %[cospi_16_64] \n\t" 535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[step_1], $ac1, 31 \n\t" 545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[const_2_power_13], $ac1 \n\t" 555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; 595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step_2 = dct_const_round_shift(temp1); 605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac0, %[Temp0], %[cospi_24_64] \n\t" 625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "msub $ac0, %[Temp1], %[cospi_8_64] \n\t" 635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[step_2], $ac0, 31 \n\t" 645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; 675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step_3 = dct_const_round_shift(temp2); 685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac1, %[Temp0], %[cospi_8_64] \n\t" 705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac1, %[Temp1], %[cospi_24_64] \n\t" 715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[step_3], $ac1, 31 \n\t" 725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[0] = step_0 + step_3; 755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[4] = step_1 + step_2; 765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[8] = step_1 - step_2; 775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[12] = step_0 - step_3; 785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp0], %[step_0], %[step_3] \n\t" 805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sh %[Temp0], 0(%[output]) \n\t" 815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp1], %[step_1], %[step_2] \n\t" 835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sh %[Temp1], 8(%[output]) \n\t" 845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sub %[Temp2], %[step_1], %[step_2] \n\t" 865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sh %[Temp2], 16(%[output]) \n\t" 875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sub %[Temp3], %[step_0], %[step_3] \n\t" 895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sh %[Temp3], 24(%[output]) \n\t" 905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [Temp0] "=&r" (Temp0), [Temp1] "=&r" (Temp1), 925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), 935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [step_0] "=&r" (step_0), [step_1] "=&r" (step_1), 945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [step_2] "=&r" (step_2), [step_3] "=&r" (step_3), 955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [output] "+r" (output) 965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [const_2_power_13] "r" (const_2_power_13), 975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), 985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [cospi_24_64] "r" (cospi_24_64), 995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [input] "r" (input) 1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input += 4; 1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output += 1; 1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 1065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 107b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void vp9_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, 1085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int dest_stride) { 1095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t step_0, step_1, step_2, step_3; 1105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int Temp0, Temp1, Temp2, Temp3; 1115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int const_2_power_13 = 8192; 1125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int i; 1135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dest_pix; 1145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 1155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch vp9_ff_cropTbl */ 1175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(vp9_ff_cropTbl); 1185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(vp9_ff_cropTbl + 32); 1195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(vp9_ff_cropTbl + 64); 1205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(vp9_ff_cropTbl + 96); 1215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(vp9_ff_cropTbl + 128); 1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(vp9_ff_cropTbl + 160); 1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(vp9_ff_cropTbl + 192); 1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(vp9_ff_cropTbl + 224); 1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 4; ++i) { 1275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest_pix = (dest + i); 1285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 1305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 1315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp_1 = (input[0] + input[2]) * cospi_16_64; 1325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step_0 = dct_const_round_shift(temp_1); 1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp_2 = (input[0] - input[2]) * cospi_16_64; 1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step_1 = dct_const_round_shift(temp_2); 1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lh %[Temp0], 0(%[input]) \n\t" 1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lh %[Temp1], 4(%[input]) \n\t" 1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[const_2_power_13], $ac0 \n\t" 1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac0 \n\t" 1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[const_2_power_13], $ac1 \n\t" 1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp2], %[Temp0], %[Temp1] \n\t" 1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sub %[Temp3], %[Temp0], %[Temp1] \n\t" 1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac0, %[Temp2], %[cospi_16_64] \n\t" 1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lh %[Temp0], 2(%[input]) \n\t" 1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lh %[Temp1], 6(%[input]) \n\t" 1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[step_0], $ac0, 31 \n\t" 1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[const_2_power_13], $ac0 \n\t" 1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac0 \n\t" 1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac1, %[Temp3], %[cospi_16_64] \n\t" 1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[step_1], $ac1, 31 \n\t" 1545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[const_2_power_13], $ac1 \n\t" 1555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 1565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 1585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; 1595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step_2 = dct_const_round_shift(temp1); 1605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 1615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac0, %[Temp0], %[cospi_24_64] \n\t" 1625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "msub $ac0, %[Temp1], %[cospi_8_64] \n\t" 1635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[step_2], $ac0, 31 \n\t" 1645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 1665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; 1675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang step_3 = dct_const_round_shift(temp2); 1685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 1695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac1, %[Temp0], %[cospi_8_64] \n\t" 1705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "madd $ac1, %[Temp1], %[cospi_24_64] \n\t" 1715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[step_3], $ac1, 31 \n\t" 1725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 1745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[0] = step_0 + step_3; 1755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[4] = step_1 + step_2; 1765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[8] = step_1 - step_2; 1775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[12] = step_0 - step_3; 1785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 1795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp0], %[step_0], %[step_3] \n\t" 1805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addi %[Temp0], %[Temp0], 8 \n\t" 1815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sra %[Temp0], %[Temp0], 4 \n\t" 1825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[Temp1], 0(%[dest_pix]) \n\t" 1835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp1], %[Temp1], %[Temp0] \n\t" 1845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp0], %[step_1], %[step_2] \n\t" 1855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[Temp2], %[Temp1](%[cm]) \n\t" 1865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp2], 0(%[dest_pix]) \n\t" 1875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" 1885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addi %[Temp0], %[Temp0], 8 \n\t" 1905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sra %[Temp0], %[Temp0], 4 \n\t" 1915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[Temp1], 0(%[dest_pix]) \n\t" 1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp1], %[Temp1], %[Temp0] \n\t" 1935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sub %[Temp0], %[step_1], %[step_2] \n\t" 1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[Temp2], %[Temp1](%[cm]) \n\t" 1955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp2], 0(%[dest_pix]) \n\t" 1965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" 1975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addi %[Temp0], %[Temp0], 8 \n\t" 1995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sra %[Temp0], %[Temp0], 4 \n\t" 2005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[Temp1], 0(%[dest_pix]) \n\t" 2015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp1], %[Temp1], %[Temp0] \n\t" 2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sub %[Temp0], %[step_0], %[step_3] \n\t" 2035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[Temp2], %[Temp1](%[cm]) \n\t" 2045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp2], 0(%[dest_pix]) \n\t" 2055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dest_pix], %[dest_pix], %[dest_stride] \n\t" 2065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addi %[Temp0], %[Temp0], 8 \n\t" 2085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sra %[Temp0], %[Temp0], 4 \n\t" 2095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[Temp1], 0(%[dest_pix]) \n\t" 2105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[Temp1], %[Temp1], %[Temp0] \n\t" 2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[Temp2], %[Temp1](%[cm]) \n\t" 2125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp2], 0(%[dest_pix]) \n\t" 2135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [Temp0] "=&r" (Temp0), [Temp1] "=&r" (Temp1), 2155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), 2165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [step_0] "=&r" (step_0), [step_1] "=&r" (step_1), 2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [step_2] "=&r" (step_2), [step_3] "=&r" (step_3), 2185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [dest_pix] "+r" (dest_pix) 2195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [const_2_power_13] "r" (const_2_power_13), 2205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [cospi_8_64] "r" (cospi_8_64), [cospi_16_64] "r" (cospi_16_64), 2215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [cospi_24_64] "r" (cospi_24_64), 2225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [input] "r" (input), [cm] "r" (cm), [dest_stride] "r" (dest_stride) 2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input += 4; 2265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 2275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, 2305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int dest_stride) { 2315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang DECLARE_ALIGNED(32, int16_t, out[4 * 4]); 2325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t *outptr = out; 2335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t pos = 45; 2345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* bit positon for extract from acc */ 2365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "wrdsp %[pos], 1 \n\t" 2385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : 2395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [pos] "r" (pos) 2405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 2415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // Rows 243b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct4_rows_dspr2(input, outptr); 2445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // Columns 246b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); 2475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 2485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, 2505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int dest_stride) { 2515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int a1, absa1; 2525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int r; 2535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t out; 2545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int t2, vector_a1, vector_a; 2555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t pos = 45; 2565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t input_dc = input[0]; 2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* bit positon for extract from acc */ 2595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 2605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "wrdsp %[pos], 1 \n\t" 2615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : 2635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [pos] "r" (pos) 2645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input_dc); 2675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 2685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addi %[out], %[out], 8 \n\t" 2695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sra %[a1], %[out], 4 \n\t" 2705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [out] "+r" (out), [a1] "=r" (a1) 2725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : 2735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 2745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (a1 < 0) { 2765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* use quad-byte 2775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * input and output memory are four byte aligned */ 2785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "abs %[absa1], %[a1] \n\t" 2805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "replv.qb %[vector_a1], %[absa1] \n\t" 2815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [absa1] "=r" (absa1), [vector_a1] "=r" (vector_a1) 2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [a1] "r" (a1) 2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (r = 4; r--;) { 2875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lw %[t2], 0(%[dest]) \n\t" 2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" 2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[vector_a], 0(%[dest]) \n\t" 2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[dest], %[dest], %[dest_stride] \n\t" 2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), 2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [dest] "+&r" (dest) 2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) 2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 2975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* use quad-byte 3005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * input and output memory are four byte aligned */ 3015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "replv.qb %[vector_a1], %[a1] \n\t" 3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [vector_a1] "=r" (vector_a1) 3045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [a1] "r" (a1) 3055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 3065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (r = 4; r--;) { 3085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 3095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lw %[t2], 0(%[dest]) \n\t" 3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" 3115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[vector_a], 0(%[dest]) \n\t" 3125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[dest], %[dest], %[dest_stride] \n\t" 3135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [t2] "=&r" (t2), [vector_a] "=&r" (vector_a), 3155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [dest] "+&r" (dest) 3165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) 3175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 3185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 3215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 322b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic void iadst4_dspr2(const int16_t *input, int16_t *output) { 3235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int s0, s1, s2, s3, s4, s5, s6, s7; 3245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int x0, x1, x2, x3; 3255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang x0 = input[0]; 3275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang x1 = input[1]; 3285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang x2 = input[2]; 3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang x3 = input[3]; 3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (!(x0 | x1 | x2 | x3)) { 3325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[0] = output[1] = output[2] = output[3] = 0; 3335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang return; 3345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s0 = sinpi_1_9 * x0; 3375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s1 = sinpi_2_9 * x0; 3385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s2 = sinpi_3_9 * x1; 3395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s3 = sinpi_4_9 * x2; 3405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s4 = sinpi_1_9 * x2; 3415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s5 = sinpi_2_9 * x3; 3425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s6 = sinpi_4_9 * x3; 3435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s7 = x0 - x2 + x3; 3445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang x0 = s0 + s3 + s5; 3465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang x1 = s1 - s4 - s6; 3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang x2 = sinpi_3_9 * s7; 3485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang x3 = s2; 3495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s0 = x0 + x3; 3515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s1 = x1 + x3; 3525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s2 = x2; 3535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang s3 = x0 + x1 - x3; 3545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // 1-D transform scaling factor is sqrt(2). 3565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // The overall dynamic range is 14b (input) + 14b (multiplication scaling) 3575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // + 1b (addition) = 29b. 3585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang // Hence the output bit depth is 15b. 3595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[0] = dct_const_round_shift(s0); 3605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[1] = dct_const_round_shift(s1); 3615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[2] = dct_const_round_shift(s2); 3625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang output[3] = dct_const_round_shift(s3); 3635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 3645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, 3665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int dest_stride, int tx_type) { 3675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int i, j; 3685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang DECLARE_ALIGNED(32, int16_t, out[4 * 4]); 3695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t *outptr = out; 3705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int16_t temp_in[4 * 4], temp_out[4]; 3715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t pos = 45; 3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* bit positon for extract from acc */ 3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "wrdsp %[pos], 1 \n\t" 3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : 3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [pos] "r" (pos) 3785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang switch (tx_type) { 3815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case DCT_DCT: // DCT in both horizontal and vertical 382b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct4_rows_dspr2(input, outptr); 383b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride); 3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case ADST_DCT: // ADST in vertical, DCT in horizontal 386b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct4_rows_dspr2(input, outptr); 3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang outptr = out; 3895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 4; ++i) { 391b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian iadst4_dspr2(outptr, temp_out); 3925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 4; ++j) 3945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * dest_stride + i] = 3955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) 3965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * dest_stride + i]); 3975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang outptr += 4; 3995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 4015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case DCT_ADST: // DCT in vertical, ADST in horizontal 4025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 4; ++i) { 403b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian iadst4_dspr2(input, outptr); 4045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input += 4; 4055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang outptr += 4; 4065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 4; ++i) { 4095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 4; ++j) { 4105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp_in[i * 4 + j] = out[j * 4 + i]; 4115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 413b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian vp9_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); 4145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case ADST_ADST: // ADST in both directions 4165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 4; ++i) { 417b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian iadst4_dspr2(input, outptr); 4185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang input += 4; 4195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang outptr += 4; 4205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 4; ++i) { 4235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 4; ++j) 4245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp_in[j] = out[j * 4 + i]; 425b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian iadst4_dspr2(temp_in, temp_out); 4265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (j = 0; j < 4; ++j) 4285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dest[j * dest_stride + i] = 4295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) 4305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang + dest[j * dest_stride + i]); 4315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 4335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang default: 4345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); 4355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 4365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 4385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#endif // #if HAVE_DSPR2 439