1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/* 2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * 4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */ 10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/mips/inv_txfm_msa.h" 12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 13da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 loc0, loc1, loc2, loc3; 15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14; 16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15; 17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 tmp5, tmp6, tmp7; 18da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 19da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); 20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian input += 8; 21da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15); 22da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 23da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian TRANSPOSE8x8_SH_SH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, 24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); 25da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian TRANSPOSE8x8_SH_SH(reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15, 26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15); 27da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14); 28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6); 29da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2); 30da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3); 31da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8); 32da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12); 33da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14); 34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SUB4(reg2, loc1, reg14, loc0, reg6, loc3, reg10, loc2, reg0, reg12, reg4, 35da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg8); 36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD4(reg2, loc1, reg14, loc0, reg6, loc3, reg10, loc2, reg2, reg14, reg6, 37da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg10); 38da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 39da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* stage 2 */ 40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15); 41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3); 42da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg9 = reg1 - loc2; 44da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg1 = reg1 + loc2; 45da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg7 = reg15 - loc3; 46da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg15 = reg15 + loc3; 47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 48da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11); 49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1); 50da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5); 51da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 52da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc1 = reg15 + reg3; 53da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg3 = reg15 - reg3; 54da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc2 = reg2 + loc1; 55da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg15 = reg2 - loc1; 56da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 57da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc1 = reg1 + reg13; 58da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg13 = reg1 - reg13; 59da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc0 = reg0 + loc1; 60da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc1 = reg0 - loc1; 61da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp6 = loc0; 62da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp7 = loc1; 63da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg0 = loc2; 64da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9); 66da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11); 67da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 68da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc0 = reg9 + reg5; 69da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg5 = reg9 - reg5; 70da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg2 = reg6 + loc0; 71da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg1 = reg6 - loc0; 72da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 73da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc0 = reg7 + reg11; 74da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg11 = reg7 - reg11; 75da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc1 = reg4 + loc0; 76da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc2 = reg4 - loc0; 77da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp5 = loc1; 78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 79da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11); 80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1); 81da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 82da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg10 = loc0; 83da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg11 = loc1; 84da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 85da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13); 86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5); 87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg13 = loc2; 89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* Transpose and store the output */ 91da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg12 = tmp5; 92da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg14 = tmp6; 93da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg3 = tmp7; 94da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* transpose block */ 96da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian TRANSPOSE8x8_SH_SH(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14, 97da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14); 98da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_SH8(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14, output, 16); 99da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 100da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* transpose block */ 101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian TRANSPOSE8x8_SH_SH(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15, 102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15); 103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_SH8(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15, (output + 8), 16); 104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, 107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride) { 108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 loc0, loc1, loc2, loc3; 109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14; 110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15; 111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 tmp5, tmp6, tmp7; 112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* load up 8x8 */ 114da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); 115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian input += 8 * 16; 116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* load bottom 8x8 */ 117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15); 118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14); 120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6); 121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2); 122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3); 123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8); 124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12); 125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14); 126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg0 = reg2 - loc1; 128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg2 = reg2 + loc1; 129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg12 = reg14 - loc0; 130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg14 = reg14 + loc0; 131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg4 = reg6 - loc3; 132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg6 = reg6 + loc3; 133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg8 = reg10 - loc2; 134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg10 = reg10 + loc2; 135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 136da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* stage 2 */ 137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15); 138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3); 139da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 140da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg9 = reg1 - loc2; 141da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg1 = reg1 + loc2; 142da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg7 = reg15 - loc3; 143da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg15 = reg15 + loc3; 144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11); 146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1); 147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5); 148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc1 = reg15 + reg3; 150da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg3 = reg15 - reg3; 151da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc2 = reg2 + loc1; 152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg15 = reg2 - loc1; 153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc1 = reg1 + reg13; 155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg13 = reg1 - reg13; 156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc0 = reg0 + loc1; 157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc1 = reg0 - loc1; 158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp6 = loc0; 159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp7 = loc1; 160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg0 = loc2; 161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9); 163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11); 164da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 165da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc0 = reg9 + reg5; 166da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg5 = reg9 - reg5; 167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg2 = reg6 + loc0; 168da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg1 = reg6 - loc0; 169da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 170da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc0 = reg7 + reg11; 171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg11 = reg7 - reg11; 172da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc1 = reg4 + loc0; 173da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loc2 = reg4 - loc0; 174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp5 = loc1; 175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 176da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11); 177da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1); 178da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 179da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg10 = loc0; 180da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg11 = loc1; 181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13); 183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5); 184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg13 = loc2; 185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* Transpose and store the output */ 187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg12 = tmp5; 188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg14 = tmp6; 189da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian reg3 = tmp7; 190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_SH(reg0, reg2, reg4, reg6, 6); 192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg0, reg2, reg4, reg6); 193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 194da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_SH(reg8, reg10, reg12, reg14, 6); 195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg8, reg10, reg12, reg14); 196da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_SH(reg3, reg13, reg11, reg5, 6); 198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg3, reg13, reg11, reg5); 199da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 200da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_SH(reg7, reg9, reg1, reg15, 6); 201da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15); 202da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 203da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 204da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst, 205da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride) { 206da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t i; 207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]); 208da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16_t *out = out_arr; 209da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* transform rows */ 211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (i = 0; i < 2; ++i) { 212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* process 16 * 8 block */ 213da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vpx_idct16_1d_rows_msa((input + (i << 7)), (out + (i << 7))); 214da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* transform columns */ 217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (i = 0; i < 2; ++i) { 218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* process 8 * 16 block */ 219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)), 220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst_stride); 221da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst, 225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride) { 226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t i; 227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]); 228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16_t *out = out_arr; 229da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* process 16 * 8 block */ 231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vpx_idct16_1d_rows_msa(input, out); 232da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* short case just considers top 4 rows as valid output */ 234da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out += 4 * 16; 235da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (i = 12; i--;) { 236da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian __asm__ __volatile__ ( 237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sw $zero, 0(%[out]) \n\t" 238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sw $zero, 4(%[out]) \n\t" 239da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sw $zero, 8(%[out]) \n\t" 240da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sw $zero, 12(%[out]) \n\t" 241da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sw $zero, 16(%[out]) \n\t" 242da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sw $zero, 20(%[out]) \n\t" 243da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sw $zero, 24(%[out]) \n\t" 244da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian "sw $zero, 28(%[out]) \n\t" 245da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 246da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : 247da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian : [out] "r" (out) 248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ); 249da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 250da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out += 16; 251da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 252da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 253da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out = out_arr; 254da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 255da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* transform columns */ 256da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (i = 0; i < 2; ++i) { 257da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* process 8 * 16 block */ 258da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)), 259da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst_stride); 260da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 261da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 262da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 263da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst, 264da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride) { 265da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t i; 266da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int16_t out; 267da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 vec, res0, res1, res2, res3, res4, res5, res6, res7; 268da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3; 269da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 270da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); 271da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); 272da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out = ROUND_POWER_OF_TWO(out, 6); 273da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 274da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec = __msa_fill_h(out); 275da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 276da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (i = 4; i--;) { 277da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 278da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian UNPCK_UB_SH(dst0, res0, res4); 279da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian UNPCK_UB_SH(dst1, res1, res5); 280da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian UNPCK_UB_SH(dst2, res2, res6); 281da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian UNPCK_UB_SH(dst3, res3, res7); 282da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3); 283da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD4(res4, vec, res5, vec, res6, vec, res7, vec, res4, res5, res6, res7); 284da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH4_0_255(res0, res1, res2, res3); 285da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH4_0_255(res4, res5, res6, res7); 286da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B4_UB(res4, res0, res5, res1, res6, res2, res7, res3, 287da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp0, tmp1, tmp2, tmp3); 288da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride); 289da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 290da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 291da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 292da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 293da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) { 294da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; 295da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15; 296da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 297da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* load input data */ 298da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SH16(input, 8, 299da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14, l7, l15); 300da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, 301da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l0, l1, l2, l3, l4, l5, l6, l7); 302da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, 303da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l8, l9, l10, l11, l12, l13, l14, l15); 304da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 305da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* ADST in horizontal */ 306da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP9_IADST8x16_1D(l0, l1, l2, l3, l4, l5, l6, l7, 307da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l8, l9, l10, l11, l12, l13, l14, l15, 308da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r0, r1, r2, r3, r4, r5, r6, r7, 309da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r8, r9, r10, r11, r12, r13, r14, r15); 310da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 311da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l1 = -r8; 312da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l3 = -r4; 313da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l13 = -r13; 314da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l15 = -r1; 315da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 316da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian TRANSPOSE8x8_SH_SH(r0, l1, r12, l3, r6, r14, r10, r2, 317da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l0, l1, l2, l3, l4, l5, l6, l7); 318da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_SH8(l0, l1, l2, l3, l4, l5, l6, l7, output, 16); 319da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian TRANSPOSE8x8_SH_SH(r3, r11, r15, r7, r5, l13, r9, l15, 320da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian l8, l9, l10, l11, l12, l13, l14, l15); 321da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_SH8(l8, l9, l10, l11, l12, l13, l14, l15, (output + 8), 16); 322da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 323da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 324da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, 325da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride) { 326da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 v0, v2, v4, v6, k0, k1, k2, k3; 327da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; 328da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 out0, out1, out2, out3, out4, out5, out6, out7; 329da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 out8, out9, out10, out11, out12, out13, out14, out15; 330da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 g0, g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, g11, g12, g13, g14, g15; 331da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11; 332da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 res0, res1, res2, res3, res4, res5, res6, res7; 333da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 res8, res9, res10, res11, res12, res13, res14, res15; 334da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; 335da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15; 336da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 zero = { 0 }; 337da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 338da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r0 = LD_SH(input + 0 * 16); 339da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r3 = LD_SH(input + 3 * 16); 340da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r4 = LD_SH(input + 4 * 16); 341da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r7 = LD_SH(input + 7 * 16); 342da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r8 = LD_SH(input + 8 * 16); 343da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r11 = LD_SH(input + 11 * 16); 344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r12 = LD_SH(input + 12 * 16); 345da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r15 = LD_SH(input + 15 * 16); 346da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 347da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* stage 1 */ 348da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k0 = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64); 349da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k1 = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); 350da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k2 = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64); 351da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k3 = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); 352da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3); 353da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k0 = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64); 354da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k1 = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); 355da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k2 = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64); 356da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k3 = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); 357da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11); 358da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(g0, g2, g10, g8, h8, h9, v2, v0); 359da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k0 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); 360da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k1 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); 361da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k2 = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); 362da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3); 363da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 364da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r1 = LD_SH(input + 1 * 16); 365da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r2 = LD_SH(input + 2 * 16); 366da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r5 = LD_SH(input + 5 * 16); 367da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r6 = LD_SH(input + 6 * 16); 368da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r9 = LD_SH(input + 9 * 16); 369da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r10 = LD_SH(input + 10 * 16); 370da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r13 = LD_SH(input + 13 * 16); 371da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian r14 = LD_SH(input + 14 * 16); 372da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 373da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k0 = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64); 374da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k1 = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); 375da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k2 = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64); 376da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k3 = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); 377da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, g4, g5, g6, g7); 378da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k0 = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64); 379da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k1 = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); 380da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k2 = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64); 381da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k3 = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); 382da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g12, g13, g14, g15); 383da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(g4, g6, g14, g12, h10, h11, v6, v4); 384da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(h8, h9, h11, h10, out0, out1, h11, h10); 385da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out1 = -out1; 386da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_SH(out0, out1, 6); 387da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst0 = LD_UB(dst + 0 * dst_stride); 388da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst1 = LD_UB(dst + 15 * dst_stride); 389da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_SH(zero, dst0, zero, dst1, res0, res1); 390da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD2(res0, out0, res1, out1, res0, res1); 391da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH2_0_255(res0, res1); 392da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SH(res0, res0, res1, res1, res0, res1); 393da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res0, dst); 394da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res1, dst + 15 * dst_stride); 395da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 396da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k0 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); 397da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k1 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); 398da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k2 = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); 399da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7); 400da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10); 401da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out8 = -out8; 402da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 403da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_SH(out8, out9, 6); 404da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst8 = LD_UB(dst + 1 * dst_stride); 405da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst9 = LD_UB(dst + 14 * dst_stride); 406da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_SH(zero, dst8, zero, dst9, res8, res9); 407da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD2(res8, out8, res9, out9, res8, res9); 408da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH2_0_255(res8, res9); 409da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SH(res8, res8, res9, res9, res8, res9); 410da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res8, dst + dst_stride); 411da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res9, dst + 14 * dst_stride); 412da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 413da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k0 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); 414da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k1 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); 415da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k2 = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); 416da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_BF(v0, v2, v4, v6, k0, k1, k2, k0, out4, out6, out5, out7); 417da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out4 = -out4; 418da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_SH(out4, out5, 6); 419da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst4 = LD_UB(dst + 3 * dst_stride); 420da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst5 = LD_UB(dst + 12 * dst_stride); 421da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_SH(zero, dst4, zero, dst5, res4, res5); 422da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD2(res4, out4, res5, out5, res4, res5); 423da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH2_0_255(res4, res5); 424da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SH(res4, res4, res5, res5, res4, res5); 425da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res4, dst + 3 * dst_stride); 426da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res5, dst + 12 * dst_stride); 427da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 428da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15); 429da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out13 = -out13; 430da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_SH(out12, out13, 6); 431da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst12 = LD_UB(dst + 2 * dst_stride); 432da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst13 = LD_UB(dst + 13 * dst_stride); 433da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_SH(zero, dst12, zero, dst13, res12, res13); 434da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD2(res12, out12, res13, out13, res12, res13); 435da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH2_0_255(res12, res13); 436da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SH(res12, res12, res13, res13, res12, res13); 437da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res12, dst + 2 * dst_stride); 438da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res13, dst + 13 * dst_stride); 439da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 440da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); 441da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k3 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); 442da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_SHORT(out6, out7, k0, k3, out6, out7); 443da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_SH(out6, out7, 6); 444da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst6 = LD_UB(dst + 4 * dst_stride); 445da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst7 = LD_UB(dst + 11 * dst_stride); 446da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_SH(zero, dst6, zero, dst7, res6, res7); 447da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD2(res6, out6, res7, out7, res6, res7); 448da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH2_0_255(res6, res7); 449da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SH(res6, res6, res7, res7, res6, res7); 450da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res6, dst + 4 * dst_stride); 451da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res7, dst + 11 * dst_stride); 452da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 453da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_SHORT(out10, out11, k0, k3, out10, out11); 454da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_SH(out10, out11, 6); 455da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst10 = LD_UB(dst + 6 * dst_stride); 456da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst11 = LD_UB(dst + 9 * dst_stride); 457da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_SH(zero, dst10, zero, dst11, res10, res11); 458da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD2(res10, out10, res11, out11, res10, res11); 459da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH2_0_255(res10, res11); 460da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SH(res10, res10, res11, res11, res10, res11); 461da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res10, dst + 6 * dst_stride); 462da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res11, dst + 9 * dst_stride); 463da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 464da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); 465da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian k2 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); 466da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_SHORT(h10, h11, k1, k2, out2, out3); 467da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_SH(out2, out3, 6); 468da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst2 = LD_UB(dst + 7 * dst_stride); 469da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst3 = LD_UB(dst + 8 * dst_stride); 470da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_SH(zero, dst2, zero, dst3, res2, res3); 471da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD2(res2, out2, res3, out3, res2, res3); 472da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH2_0_255(res2, res3); 473da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SH(res2, res2, res3, res3, res2, res3); 474da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res2, dst + 7 * dst_stride); 475da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res3, dst + 8 * dst_stride); 476da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 477da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian MADD_SHORT(out14, out15, k1, k2, out14, out15); 478da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_SH(out14, out15, 6); 479da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst14 = LD_UB(dst + 5 * dst_stride); 480da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst15 = LD_UB(dst + 10 * dst_stride); 481da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_SH(zero, dst14, zero, dst15, res14, res15); 482da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADD2(res14, out14, res15, out15, res14, res15); 483da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian CLIP_SH2_0_255(res14, res15); 484da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SH(res14, res14, res15, res15, res14, res15); 485da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res14, dst + 5 * dst_stride); 486da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x1_UB(res15, dst + 10 * dst_stride); 487da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 488