1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/* 2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * 4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */ 10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vp8_rtcd.h" 12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_ports/mem.h" 13da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vp8/common/filter.h" 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vp8/common/mips/msa/vp8_macros_msa.h" 15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh VenkatasubramanianDECLARE_ALIGNED(16, static const int8_t, vp8_bilinear_filters_msa[7][2]) = 17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 18da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 112, 16 }, 19da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 96, 32 }, 20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 80, 48 }, 21da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 64, 64 }, 22da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 48, 80 }, 23da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 32, 96 }, 24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 16, 112 } 25da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}; 26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 27da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic const uint8_t vp8_mc_filt_mask_arr[16 * 3] = 28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 29da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* 8 width cases */ 30da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 31da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* 4 width cases */ 32da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20, 33da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* 4 width cases */ 34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28 35da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}; 36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 37da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, 38da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 39da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter) 40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, mask; 42da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0, vec0, vec1, res0, res1; 43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 vec2, vec3, filt; 44da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 45da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[16]); 46da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 48da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 50da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 51da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); 52da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3); 53da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(vec2, vec3, VP8_FILTER_SHIFT); 54da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1); 55da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); 56da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 57da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 58da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride, 59da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 60da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter) 61da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 62da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 vec0, vec1, vec2, vec3, filt0; 63da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; 64da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 res0, res1, res2, res3; 65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 vec4, vec5, vec6, vec7, filt; 66da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 67da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[16]); 68da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 69da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 70da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 71da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 72da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 73da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); 74da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3); 75da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 76da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec4, vec5, vec6, vec7); 77da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT); 78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, 79da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res0, res1, res2, res3); 80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); 81da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 82da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); 83da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 84da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 85da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, 86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter, int32_t height) 88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (4 == height) 90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 91da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter); 92da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 93da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else if (8 == height) 94da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter); 96da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 97da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 98da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 99da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride, 100da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter) 102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0; 104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, mask; 105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 vec0, vec1, vec2, vec3, filt; 106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[0]); 108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 114da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0, vec1, vec2, vec3); 117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); 118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1); 119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(src0, src1, dst, dst_stride); 120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride, 123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter, int32_t height) 125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0; 127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, mask, out0, out1; 128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 vec0, vec1, vec2, vec3, filt; 129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[0]); 131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 136da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 139da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 140da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 141da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0, vec1, vec2, vec3); 142da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); 143da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); 148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst, dst_stride); 149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 150da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 151da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0, vec1, vec2, vec3); 155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); 156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); 157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst, dst_stride); 158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (16 == height) 161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 164da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 165da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 166da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 168da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0, vec1, vec2, vec3); 169da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); 170da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 172da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 173da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); 174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst, dst_stride); 175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 176da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 177da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 178da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 179da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0, vec1, vec2, vec3); 180da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); 181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); 182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride); 183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, 187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter, int32_t height) 189da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (4 == height) 191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter); 193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 194da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 196da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height); 197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 199da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 200da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, 201da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 202da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter, int32_t height) 203da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 204da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt; 205da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; 206da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt; 208da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 209da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[0]); 210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian loop_cnt = (height >> 2) - 1; 212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 213da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 214da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src2, src4, src6); 217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src + 8, src_stride, src1, src3, src5, src7); 218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); 221da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); 222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); 223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); 224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out0, out1, out2, out3); 226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, 227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out4, out5, out6, out7); 228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT); 229da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT); 230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(out0, out1, dst); 231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 232da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(out2, out3, dst); 233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 234da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(out4, out5, dst); 235da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 236da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(out6, out7, dst); 237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 239da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (; loop_cnt--;) 240da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 241da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src2, src4, src6); 242da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src + 8, src_stride, src1, src3, src5, src7); 243da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 244da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 245da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); 246da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); 247da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); 248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); 249da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 250da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out0, out1, out2, out3); 251da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, 252da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out4, out5, out6, out7); 253da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT); 254da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT); 255da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(out0, out1, dst); 256da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 257da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(out2, out3, dst); 258da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 259da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(out4, out5, dst); 260da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 261da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(out6, out7, dst); 262da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 263da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 264da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 265da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 266da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_vt_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, 267da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 268da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter) 269da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 270da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4; 271da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332; 272da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0; 273da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt; 274da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 tmp0, tmp1; 275da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 276da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 277da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h(filt, 0); 278da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 279da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB5(src, src_stride, src0, src1, src2, src3, src4); 280da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (5 * src_stride); 281da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 282da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, 283da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src10_r, src21_r, src32_r, src43_r); 284da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332); 285da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1); 286da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT); 287da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); 288da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride); 289da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 290da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 291da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_vt_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride, 292da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 293da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter) 294da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 295da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 296da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r; 297da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776; 298da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 tmp0, tmp1, tmp2, tmp3; 299da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0; 300da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt; 301da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 302da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 303da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h(filt, 0); 304da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 305da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 306da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (8 * src_stride); 307da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 308da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src8 = LD_SB(src); 309da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 310da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 311da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, 312da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src32_r, src43_r); 313da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r, 314da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src76_r, src87_r); 315da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, 316da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src87_r, src76_r, src2110, src4332, src6554, src8776); 317da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0, 318da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp0, tmp1, tmp2, tmp3); 319da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); 320da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332); 321da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride); 322da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride); 323da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 324da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 325da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_vt_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, 326da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 327da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter, int32_t height) 328da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 329da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (4 == height) 330da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 331da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter); 332da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 333da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else if (8 == height) 334da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 335da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter); 336da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 337da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 338da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 339da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_vt_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride, 340da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 341da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter) 342da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 343da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0; 344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 out0, out1; 345da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 tmp0, tmp1, tmp2, tmp3; 346da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt; 347da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 348da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 349da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h(filt, 0); 350da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 351da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB5(src, src_stride, src0, src1, src2, src3, src4); 352da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1); 353da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3); 354da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 355da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp0, tmp1, tmp2, tmp3); 356da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); 357da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); 358da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst, dst_stride); 359da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 360da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 361da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_vt_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride, 362da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 363da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter, int32_t height) 364da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 365da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt; 366da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; 367da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; 368da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 out0, out1; 369da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 tmp0, tmp1, tmp2, tmp3; 370da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt; 371da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 372da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 373da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h(filt, 0); 374da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 375da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src0 = LD_UB(src); 376da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 377da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 378da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = (height >> 3); loop_cnt--;) 379da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 380da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8); 381da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (8 * src_stride); 382da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 383da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, 384da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0, vec1, vec2, vec3); 385da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, 386da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec4, vec5, vec6, vec7); 387da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, 388da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp0, tmp1, tmp2, tmp3); 389da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); 390da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); 391da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst, dst_stride); 392da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 393da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 394da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, 395da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp0, tmp1, tmp2, tmp3); 396da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); 397da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); 398da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst, dst_stride); 399da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 400da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 401da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src0 = src8; 402da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 403da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 404da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 405da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_vt_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, 406da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 407da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter, int32_t height) 408da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 409da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (4 == height) 410da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 411da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter); 412da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 413da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 414da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 415da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, 416da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian height); 417da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 418da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 419da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 420da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_vt_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, 421da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 422da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter, int32_t height) 423da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 424da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt; 425da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 src0, src1, src2, src3, src4; 426da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; 427da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 tmp0, tmp1, tmp2, tmp3; 428da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt; 429da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 430da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 431da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h(filt, 0); 432da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 433da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src0 = LD_UB(src); 434da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 435da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 436da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = (height >> 2); loop_cnt--;) 437da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 438da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src, src_stride, src1, src2, src3, src4); 439da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 440da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 441da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); 442da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); 443da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); 444da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT); 445da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(tmp0, tmp1, dst); 446da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 447da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 448da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6); 449da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7); 450da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); 451da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT); 452da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(tmp2, tmp3, dst); 453da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 454da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 455da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); 456da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT); 457da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(tmp0, tmp1, dst); 458da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 459da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 460da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); 461da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT); 462da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(tmp2, tmp3, dst); 463da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 464da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 465da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src0 = src4; 466da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 467da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 468da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 469da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hv_2ht_2vt_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, 470da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 471da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_horiz, 472da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_vert) 473da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 474da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, mask; 475da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1; 476da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1; 477da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 478da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[16]); 479da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 480da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter_horiz); 481da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0); 482da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter_vert); 483da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0); 484da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 485da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB5(src, src_stride, src0, src1, src2, src3, src4); 486da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT); 487da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT); 488da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT); 489da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8); 490da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2); 491da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 492da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); 493da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); 494da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT); 495da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1); 496da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); 497da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 498da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 499da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hv_2ht_2vt_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride, 500da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 501da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_horiz, 502da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_vert) 503da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 504da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask; 505da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 res0, res1, res2, res3; 506da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3; 507da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; 508da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt; 509da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 510da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[16]); 511da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 512da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter_horiz); 513da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0); 514da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter_vert); 515da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0); 516da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 517da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 518da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (8 * src_stride); 519da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src8 = LD_SB(src); 520da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 521da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT); 522da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT); 523da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, VP8_FILTER_SHIFT); 524da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, VP8_FILTER_SHIFT); 525da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, VP8_FILTER_SHIFT); 526da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1, 527da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out3, hz_out5, 8); 528da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6); 529da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 530da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); 531da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3); 532da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt, 533da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec4, vec5, vec6, vec7); 534da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT); 535da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, 536da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res0, res1, res2, res3); 537da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); 538da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 539da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); 540da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 541da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 542da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hv_2ht_2vt_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, 543da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 544da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_horiz, 545da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_vert, 546da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t height) 547da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 548da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (4 == height) 549da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 550da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride, 551da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filter_horiz, filter_vert); 552da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 553da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else if (8 == height) 554da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 555da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride, 556da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filter_horiz, filter_vert); 557da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 558da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 559da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 560da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hv_2ht_2vt_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride, 561da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 562da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_horiz, 563da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_vert) 564da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 565da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, mask, out0, out1; 566da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3; 567da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3; 568da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt; 569da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 570da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[0]); 571da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 572da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter_horiz); 573da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_hz = (v16u8)__msa_splati_h(filt, 0); 574da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter_vert); 575da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_vt = (v16u8)__msa_splati_h(filt, 0); 576da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 577da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB5(src, src_stride, src0, src1, src2, src3, src4); 578da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 579da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT); 580da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT); 581da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 582da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp0 = __msa_dotp_u_h(vec0, filt_vt); 583da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 584da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, VP8_FILTER_SHIFT); 585da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 586da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp1 = __msa_dotp_u_h(vec1, filt_vt); 587da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 588da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, VP8_FILTER_SHIFT); 589da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 590da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp2 = __msa_dotp_u_h(vec2, filt_vt); 591da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 592da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT); 593da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 594da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp3 = __msa_dotp_u_h(vec3, filt_vt); 595da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 596da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); 597da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); 598da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst, dst_stride); 599da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 600da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 601da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hv_2ht_2vt_8x8mult_msa(uint8_t *RESTRICT src, 602da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t src_stride, 603da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, 604da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 605da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_horiz, 606da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_vert, 607da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t height) 608da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 609da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt; 610da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, mask, out0, out1; 611da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt_hz, filt_vt, vec0; 612da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; 613da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt; 614da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 615da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[0]); 616da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 617da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter_horiz); 618da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_hz = (v16u8)__msa_splati_h(filt, 0); 619da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter_vert); 620da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_vt = (v16u8)__msa_splati_h(filt, 0); 621da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 622da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src0 = LD_SB(src); 623da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 624da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 625da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT); 626da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 627da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = (height >> 3); loop_cnt--;) 628da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 629da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src1, src2, src3, src4); 630da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 631da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 632da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 633da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 634da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 635da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp1 = __msa_dotp_u_h(vec0, filt_vt); 636da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 637da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 638da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 639da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 640da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp2 = __msa_dotp_u_h(vec0, filt_vt); 641da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 642da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); 643da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 644da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 645da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 646da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 647da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp3 = __msa_dotp_u_h(vec0, filt_vt); 648da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 649da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 650da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 651da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src1, src2, src3, src4); 652da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 653da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 654da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp4 = __msa_dotp_u_h(vec0, filt_vt); 655da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 656da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp3, tmp4, VP8_FILTER_SHIFT); 657da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1); 658da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst, dst_stride); 659da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 660da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 661da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 662da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 663da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 664da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp5 = __msa_dotp_u_h(vec0, filt_vt); 665da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 666da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 667da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 668da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 669da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp6 = __msa_dotp_u_h(vec0, filt_vt); 670da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 671da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 672da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 673da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); 674da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp7 = __msa_dotp_u_h(vec0, filt_vt); 675da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 676da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 677da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 678da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); 679da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp8 = __msa_dotp_u_h(vec0, filt_vt); 680da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 681da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, VP8_FILTER_SHIFT); 682da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1); 683da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST8x4_UB(out0, out1, dst, dst_stride); 684da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 685da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 686da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 687da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 688da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hv_2ht_2vt_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, 689da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 690da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_horiz, 691da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_vert, 692da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t height) 693da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 694da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (4 == height) 695da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 696da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride, 697da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filter_horiz, filter_vert); 698da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 699da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 700da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 701da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride, 702da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filter_horiz, filter_vert, height); 703da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 704da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 705da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 706da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hv_2ht_2vt_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, 707da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride, 708da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_horiz, 709da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *filter_vert, 710da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t height) 711da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 712da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt; 713da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; 714da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt_hz, filt_vt, vec0, vec1; 715da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3; 716da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt; 717da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 718da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&vp8_mc_filt_mask_arr[0]); 719da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 720da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 721da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter_horiz); 722da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_hz = (v16u8)__msa_splati_h(filt, 0); 723da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter_vert); 724da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_vt = (v16u8)__msa_splati_h(filt, 0); 725da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 726da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB2(src, 8, src0, src1); 727da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 728da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 729da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT); 730da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT); 731da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 732da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = (height >> 2); loop_cnt--;) 733da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 734da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src2, src4, src6); 735da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src + 8, src_stride, src1, src3, src5, src7); 736da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 737da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 738da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 739da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 740da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 741da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 742da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); 743da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); 744da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); 745da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(tmp1, tmp2, dst); 746da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 747da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 748da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 749da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 750da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 751da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 752da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); 753da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); 754da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); 755da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(tmp1, tmp2, dst); 756da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 757da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 758da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 759da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 760da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, 761da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 762da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); 763da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); 764da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); 765da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(tmp1, tmp2, dst); 766da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 767da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 768da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, 769da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 770da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, 771da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VP8_FILTER_SHIFT); 772da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); 773da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); 774da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); 775da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_ST_SB(tmp1, tmp2, dst); 776da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 777da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 778da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 779da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 780da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vp8_bilinear_predict4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, 781da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t xoffset, int32_t yoffset, 782da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride) 783da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 784da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1]; 785da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1]; 786da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 787da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (yoffset) 788da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 789da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (xoffset) 790da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 791da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hv_2ht_2vt_4w_msa(src, src_stride, dst, dst_stride, 792da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian h_filter, v_filter, 4); 793da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 794da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 795da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 796da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_vt_2t_4w_msa(src, src_stride, dst, dst_stride, v_filter, 4); 797da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 798da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 799da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 800da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 801da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (xoffset) 802da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 803da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_4w_msa(src, src_stride, dst, dst_stride, h_filter, 4); 804da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 805da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 806da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 807da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t tp0, tp1, tp2, tp3; 808da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 809da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LW4(src, src_stride, tp0, tp1, tp2, tp3); 810da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SW4(tp0, tp1, tp2, tp3, dst, dst_stride); 811da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 812da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 813da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 814da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 815da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vp8_bilinear_predict8x4_msa(uint8_t *RESTRICT src, int32_t src_stride, 816da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t xoffset, int32_t yoffset, 817da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride) 818da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 819da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1]; 820da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1]; 821da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 822da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (yoffset) 823da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 824da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (xoffset) 825da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 826da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride, 827da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian h_filter, v_filter, 4); 828da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 829da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 830da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 831da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 4); 832da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 833da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 834da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 835da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 836da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (xoffset) 837da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 838da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 4); 839da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 840da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 841da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 842da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vp8_copy_mem8x4(src, src_stride, dst, dst_stride); 843da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 844da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 845da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 846da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 847da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vp8_bilinear_predict8x8_msa(uint8_t *RESTRICT src, int32_t src_stride, 848da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t xoffset, int32_t yoffset, 849da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride) 850da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 851da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1]; 852da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1]; 853da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 854da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (yoffset) 855da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 856da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (xoffset) 857da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 858da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride, 859da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian h_filter, v_filter, 8); 860da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 861da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 862da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 863da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 8); 864da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 865da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 866da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 867da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 868da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (xoffset) 869da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 870da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 8); 871da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 872da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 873da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 874da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vp8_copy_mem8x8(src, src_stride, dst, dst_stride); 875da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 876da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 877da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 878da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 879da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vp8_bilinear_predict16x16_msa(uint8_t *RESTRICT src, int32_t src_stride, 880da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t xoffset, int32_t yoffset, 881da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *RESTRICT dst, int32_t dst_stride) 882da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian{ 883da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1]; 884da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1]; 885da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 886da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (yoffset) 887da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 888da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (xoffset) 889da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 890da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride, 891da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian h_filter, v_filter, 16); 892da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 893da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 894da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 895da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_vt_2t_16w_msa(src, src_stride, dst, dst_stride, v_filter, 896da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 16); 897da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 898da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 899da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 900da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 901da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (xoffset) 902da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 903da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_16w_msa(src, src_stride, dst, dst_stride, h_filter, 904da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 16); 905da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 906da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian else 907da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian { 908da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vp8_copy_mem16x16(src, src_stride, dst, dst_stride); 909da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 910da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 911da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 912