1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/* 2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * 4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */ 10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <assert.h> 12da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_dsp_rtcd.h" 13da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/mips/vpx_convolve_msa.h" 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_8t_and_aver_dst_4x4_msa(const uint8_t *src, 167bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 18da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int8_t *filter) { 19da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; 20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 dst0, dst1, dst2, dst3, res2, res3; 21da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 mask0, mask1, mask2, mask3; 22da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt, res0, res1; 23da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask0 = LD_UB(&mc_filt_mask_arr[16]); 25da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src -= 3; 26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 27da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 29da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); 30da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 31da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask1 = mask0 + 2; 32da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask2 = mask0 + 4; 33da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask3 = mask0 + 6; 34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 35da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian XORI_B4_128_SB(src0, src1, src2, src3); 37da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, 38da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0, filt1, filt2, filt3, res0, res1); 39da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_SH(res0, res1, FILTER_BITS); 41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SAT_SH2_SH(res0, res1, 7); 42da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_UB(res0, res0, res1, res1, res2, res3); 43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2); 44da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian XORI_B2_128_UB(res2, res3); 45da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian AVER_UB2_UB(res2, dst0, res3, dst2, res2, res3); 46da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); 47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 48da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_8t_and_aver_dst_4x8_msa(const uint8_t *src, 507bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 51da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 52da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int8_t *filter) { 53da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; 54da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 mask0, mask1, mask2, mask3, res0, res1, res2, res3; 55da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; 56da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt, vec0, vec1, vec2, vec3; 57da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 58da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask0 = LD_UB(&mc_filt_mask_arr[16]); 59da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src -= 3; 60da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 61da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 62da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 63da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); 64da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask1 = mask0 + 2; 66da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask2 = mask0 + 4; 67da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask3 = mask0 + 6; 68da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 69da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 70da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian XORI_B4_128_SB(src0, src1, src2, src3); 71da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 72da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); 73da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, 74da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0, filt1, filt2, filt3, vec0, vec1); 75da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 76da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian XORI_B4_128_SB(src0, src1, src2, src3); 77da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, 78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0, filt1, filt2, filt3, vec2, vec3); 79da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_SH(vec0, vec1, vec2, vec3, FILTER_BITS); 80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SAT_SH4_SH(vec0, vec1, vec2, vec3, 7); 81da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B4_UB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, res0, res1, res2, 82da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res3); 83da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_D2_UB(res1, res0, res3, res2, res0, res2); 84da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian XORI_B2_128_UB(res0, res2); 85da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2, dst4, 86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst6); 87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_D2_UB(dst2, dst0, dst6, dst4, dst0, dst4); 88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian AVER_UB2_UB(res0, dst0, res2, dst4, res0, res2); 89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x8_UB(res0, res2, dst, dst_stride); 90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 91da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 92da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_8t_and_aver_dst_4w_msa(const uint8_t *src, 937bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 947bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t dst_stride, int8_t *filter, 95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t height) { 96da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (4 == height) { 97da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_8t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter); 98da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else if (8 == height) { 99da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_8t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter); 100da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_8t_and_aver_dst_8w_msa(const uint8_t *src, 1047bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 1057bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t dst_stride, int8_t *filter, 106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t height) { 107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t loop_cnt; 108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; 109da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 mask0, mask1, mask2, mask3, dst0, dst1, dst2, dst3; 110da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt, out0, out1, out2, out3; 111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask0 = LD_UB(&mc_filt_mask_arr[0]); 113da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src -= 3; 114da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); 118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask1 = mask0 + 2; 120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask2 = mask0 + 4; 121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask3 = mask0 + 6; 122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = (height >> 2); loop_cnt--;) { 124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian XORI_B4_128_SB(src0, src1, src2, src3); 126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, 128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask3, filt0, filt1, filt2, filt3, out0, out1, 129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out2, out3); 130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); 132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SAT_SH4_SH(out0, out1, out2, out3, 7); 1337bc9febe8749e98a3812a0dc4380ceae75c29450Johann CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3, dst, 1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_stride); 135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 136da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 139da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_8t_and_aver_dst_16w_msa(const uint8_t *src, 1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 141da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8_t *filter, int32_t height) { 143da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t loop_cnt; 144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; 145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 mask0, mask1, mask2, mask3, dst0, dst1; 146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt, out0, out1, out2, out3; 147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15; 149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 150da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask0 = LD_UB(&mc_filt_mask_arr[0]); 151da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src -= 3; 152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); 156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask1 = mask0 + 2; 158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask2 = mask0 + 4; 159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask3 = mask0 + 6; 160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = height >> 1; loop_cnt--;) { 162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB2(src, src_stride, src0, src2); 163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB2(src + 8, src_stride, src1, src3); 164da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (2 * src_stride); 165da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 166da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian XORI_B4_128_SB(src0, src1, src2, src3); 167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, vec12); 168da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, vec13); 169da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10, 170da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec14); 171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11, 172da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec15); 173da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, 174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2, vec3); 175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8, 176da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec9, vec10, vec11); 177da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, vec1, 178da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2, vec3); 179da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8, 180da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec9, vec10, vec11); 181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1, 182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out2, out3); 183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB2(dst, dst_stride, dst0, dst1); 184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); 185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SAT_SH4_SH(out0, out1, out2, out3, 7); 186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_XORI128_AVG_ST_UB(out1, out0, dst0, dst); 187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_XORI128_AVG_ST_UB(out3, out2, dst1, dst); 189da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_8t_and_aver_dst_32w_msa(const uint8_t *src, 1947bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 1967bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8_t *filter, int32_t height) { 197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt; 198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; 199da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 dst1, dst2, mask0, mask1, mask2, mask3; 200da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt, out0, out1, out2, out3; 201da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 202da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15; 203da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 204da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask0 = LD_UB(&mc_filt_mask_arr[0]); 205da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src -= 3; 206da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 207da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 208da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 209da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); 210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask1 = mask0 + 2; 212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask2 = mask0 + 4; 213da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask3 = mask0 + 6; 214da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = height; loop_cnt--;) { 216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src0 = LD_SB(src); 217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src2 = LD_SB(src + 16); 218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src3 = LD_SB(src + 24); 219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src1 = __msa_sldi_b(src2, src0, 8); 220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 221da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian XORI_B4_128_SB(src0, src1, src2, src3); 223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, vec12); 224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, vec13); 225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10, 226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec14); 227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11, 228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec15); 229da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, 230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2, vec3); 231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8, 232da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec9, vec10, vec11); 233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, vec1, 234da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2, vec3); 235da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8, 236da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec9, vec10, vec11); 237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1, 238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out2, out3); 239da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); 240da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SAT_SH4_SH(out0, out1, out2, out3, 7); 241da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB2(dst, 16, dst1, dst2); 242da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, dst); 243da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, dst + 16); 244da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 245da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 246da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 247da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_8t_and_aver_dst_64w_msa(const uint8_t *src, 2497bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 250da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 2517bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8_t *filter, int32_t height) { 252da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt, cnt; 253da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; 254da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 dst1, dst2, mask0, mask1, mask2, mask3; 255da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 filt, out0, out1, out2, out3; 256da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 257da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15; 258da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 259da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask0 = LD_UB(&mc_filt_mask_arr[0]); 260da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src -= 3; 261da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 262da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 263da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_SH(filter); 264da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); 265da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 266da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask1 = mask0 + 2; 267da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask2 = mask0 + 4; 268da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask3 = mask0 + 6; 269da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 270da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = height; loop_cnt--;) { 271da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = 0; cnt < 2; ++cnt) { 272da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src0 = LD_SB(&src[cnt << 5]); 273da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src2 = LD_SB(&src[16 + (cnt << 5)]); 274da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src3 = LD_SB(&src[24 + (cnt << 5)]); 275da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src1 = __msa_sldi_b(src2, src0, 8); 276da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 277da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian XORI_B4_128_SB(src0, src1, src2, src3); 278da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, 279da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec12); 280da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, 281da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec13); 282da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10, 283da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec14); 284da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11, 285da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec15); 286da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, 287da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec1, vec2, vec3); 288da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8, 289da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec9, vec10, vec11); 290da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, 291da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec1, vec2, vec3); 292da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8, 293da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec9, vec10, vec11); 294da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1, 295da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out2, out3); 296da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); 297da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SAT_SH4_SH(out0, out1, out2, out3, 7); 298da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB2(&dst[cnt << 5], 16, dst1, dst2); 299da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, &dst[cnt << 5]); 300da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, &dst[16 + (cnt << 5)]); 301da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 302da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 303da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 304da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 305da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 306da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 307da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 308da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_and_aver_dst_4x4_msa(const uint8_t *src, 3097bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 310da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 311da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int8_t *filter) { 312da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, mask; 313da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0, dst0, dst1, dst2, dst3, vec0, vec1, res0, res1; 314da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 vec2, vec3, filt; 315da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 316da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&mc_filt_mask_arr[16]); 317da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 318da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 319da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 320da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 321da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 322da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 323da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 324da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); 325da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3); 326da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H2_UH(vec2, vec3, FILTER_BITS); 327da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1); 328da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2); 329da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian AVER_UB2_UB(res0, dst0, res1, dst2, res0, res1); 330da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); 331da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 332da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 333da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_and_aver_dst_4x8_msa(const uint8_t *src, 3347bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 335da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 336da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int8_t *filter) { 337da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; 338da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0, vec0, vec1, vec2, vec3, res0, res1, res2, res3; 339da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; 340da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 vec4, vec5, vec6, vec7, filt; 341da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 342da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&mc_filt_mask_arr[16]); 343da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 345da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 346da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 347da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 348da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 349da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); 350da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); 351da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3); 352da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5, 353da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec6, vec7); 354da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS); 355da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_B4_UB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2, 356da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res3); 357da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2, dst4, 358da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst6); 359da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian AVER_UB4_UB(res0, dst0, res1, dst2, res2, dst4, res3, dst6, res0, res1, res2, 360da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res3); 361da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); 362da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 363da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); 364da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 365da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 366da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_and_aver_dst_4w_msa(const uint8_t *src, 3677bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 3687bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t dst_stride, int8_t *filter, 369da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t height) { 370da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (4 == height) { 371da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter); 372da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else if (8 == height) { 373da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter); 374da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 375da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 376da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 377da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_and_aver_dst_8x4_msa(const uint8_t *src, 3787bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 379da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 380da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int8_t *filter) { 381da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, mask; 382da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0, dst0, dst1, dst2, dst3; 383da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 vec0, vec1, vec2, vec3, filt; 384da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 385da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&mc_filt_mask_arr[0]); 386da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 387da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 388da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 389da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 390da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 391da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 392da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 393da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 394da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, 395da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2, vec3); 396da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); 397da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 3987bc9febe8749e98a3812a0dc4380ceae75c29450Johann PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3, dst, 3997bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_stride); 400da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 401da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 4027bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void common_hz_2t_and_aver_dst_8x8mult_msa( 4037bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, 4047bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8_t *filter, int32_t height) { 405da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, mask; 406da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0, dst0, dst1, dst2, dst3; 407da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 vec0, vec1, vec2, vec3, filt; 408da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 409da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&mc_filt_mask_arr[0]); 410da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 411da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 412da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 413da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 414da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 415da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 416da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 417da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 418da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 419da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, 420da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2, vec3); 421da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); 422da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 423da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 424da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 4257bc9febe8749e98a3812a0dc4380ceae75c29450Johann PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3, dst, 4267bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_stride); 427da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 428da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 429da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 430da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 431da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, 432da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2, vec3); 433da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); 434da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 4357bc9febe8749e98a3812a0dc4380ceae75c29450Johann PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3, dst, 4367bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_stride); 437da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 438da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 439da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (16 == height) { 440da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 441da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 442da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 443da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 444da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 445da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, 446da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2, vec3); 447da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); 448da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 449da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src1, src2, src3); 4507bc9febe8749e98a3812a0dc4380ceae75c29450Johann PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3, dst, 4517bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_stride); 452da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 453da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 454da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); 455da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); 456da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, 457da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian vec2, vec3); 458da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); 459da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 4607bc9febe8749e98a3812a0dc4380ceae75c29450Johann PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3, dst, 4617bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_stride); 462da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 463da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 464da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 465da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_and_aver_dst_8w_msa(const uint8_t *src, 4667bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 4677bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t dst_stride, int8_t *filter, 468da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t height) { 469da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (4 == height) { 470da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride, filter); 471da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else { 472da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian common_hz_2t_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride, 473da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filter, height); 474da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 475da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 476da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 477da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_and_aver_dst_16w_msa(const uint8_t *src, 4787bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 479da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 4807bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8_t *filter, int32_t height) { 481da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt; 482da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; 483da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0, dst0, dst1, dst2, dst3; 484da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 485da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 res0, res1, res2, res3, res4, res5, res6, res7, filt; 486da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 487da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&mc_filt_mask_arr[0]); 488da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 489da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 490da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 491da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 492da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 493da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src2, src4, src6); 494da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src + 8, src_stride, src1, src3, src5, src7); 495da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 496da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 497da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); 498da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); 499da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); 500da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); 501da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1, 502da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res2, res3); 503da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5, 504da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res6, res7); 505da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS); 506da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS); 507da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 508da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res1, res0, dst0, dst); 509da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 510da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res3, res2, dst1, dst); 511da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 512da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res5, res4, dst2, dst); 513da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 514da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res7, res6, dst3, dst); 515da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 516da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 517da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = (height >> 2) - 1; loop_cnt--;) { 518da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, src_stride, src0, src2, src4, src6); 519da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src + 8, src_stride, src1, src3, src5, src7); 520da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 521da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 522da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); 523da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); 524da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); 525da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); 526da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1, 527da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res2, res3); 528da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5, 529da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res6, res7); 530da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS); 531da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS); 532da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); 533da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res1, res0, dst0, dst); 534da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 535da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res3, res2, dst1, dst); 536da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 537da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res5, res4, dst2, dst); 538da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 539da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res7, res6, dst3, dst); 540da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 541da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 542da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 543da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 544da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_and_aver_dst_32w_msa(const uint8_t *src, 5457bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 546da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 5477bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8_t *filter, int32_t height) { 548da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt; 549da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; 550da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0, dst0, dst1, dst2, dst3; 551da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 552da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 res0, res1, res2, res3, res4, res5, res6, res7, filt; 553da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 554da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&mc_filt_mask_arr[0]); 555da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 556da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 557da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 558da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 559da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 560da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = (height >> 1); loop_cnt--;) { 561da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src0 = LD_SB(src); 562da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src2 = LD_SB(src + 16); 563da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src3 = LD_SB(src + 24); 564da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src1 = __msa_sldi_b(src2, src0, 8); 565da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 566da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src4 = LD_SB(src); 567da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src6 = LD_SB(src + 16); 568da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src7 = LD_SB(src + 24); 569da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src5 = __msa_sldi_b(src6, src4, 8); 570da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 571da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 572da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); 573da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); 574da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); 575da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); 576da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1, 577da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res2, res3); 578da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5, 579da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian res6, res7); 580da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS); 581da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS); 582da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB2(dst, 16, dst0, dst1); 583da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res1, res0, dst0, dst); 584da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res3, res2, dst1, (dst + 16)); 585da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 586da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB2(dst, 16, dst2, dst3); 587da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res5, res4, dst2, dst); 588da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(res7, res6, dst3, (dst + 16)); 589da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 590da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 591da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 592da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 593da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void common_hz_2t_and_aver_dst_64w_msa(const uint8_t *src, 5947bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 595da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t dst_stride, 5967bc9febe8749e98a3812a0dc4380ceae75c29450Johann int8_t *filter, int32_t height) { 597da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t loop_cnt; 598da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; 599da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 filt0, dst0, dst1, dst2, dst3; 600da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; 601da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt; 602da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 603da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian mask = LD_SB(&mc_filt_mask_arr[0]); 604da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 605da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* rearranging filter */ 606da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt = LD_UH(filter); 607da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); 608da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 609da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = height; loop_cnt--;) { 610da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_SB4(src, 16, src0, src2, src4, src6); 611da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src7 = LD_SB(src + 56); 612da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8); 613da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 614da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 615da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); 616da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); 617da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); 618da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); 619da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, 620da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out2, out3); 621da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, 622da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out6, out7); 623da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS); 624da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS); 625da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(dst, 16, dst0, dst1, dst2, dst3); 626da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(out1, out0, dst0, dst); 627da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(out3, out2, dst1, dst + 16); 628da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(out5, out4, dst2, dst + 32); 629da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian PCKEV_AVG_ST_UB(out7, out6, dst3, dst + 48); 630da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 631da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 632da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 633da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 634da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride, 635da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst, ptrdiff_t dst_stride, 636da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const int16_t *filter_x, int x_step_q4, 6377bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_y, int y_step_q4, int w, 6387bc9febe8749e98a3812a0dc4380ceae75c29450Johann int h) { 639da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int8_t cnt, filt_hor[8]; 640da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 641da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(x_step_q4 == 16); 642da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(((const int32_t *)filter_x)[1] != 0x800000); 643da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 644da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = 0; cnt < 8; ++cnt) { 645da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian filt_hor[cnt] = filter_x[cnt]; 646da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 647da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 648da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (((const int32_t *)filter_x)[0] == 0) { 649da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian switch (w) { 650da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 4: 6517bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, 6527bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, &filt_hor[3], h); 653da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 654da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 8: 6557bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_2t_and_aver_dst_8w_msa(src, (int32_t)src_stride, dst, 6567bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, &filt_hor[3], h); 657da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 658da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 16: 6597bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_2t_and_aver_dst_16w_msa(src, (int32_t)src_stride, dst, 6607bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, &filt_hor[3], h); 661da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 662da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 32: 6637bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_2t_and_aver_dst_32w_msa(src, (int32_t)src_stride, dst, 6647bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, &filt_hor[3], h); 665da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 666da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 64: 6677bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_2t_and_aver_dst_64w_msa(src, (int32_t)src_stride, dst, 6687bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, &filt_hor[3], h); 669da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 670da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian default: 6717bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, 6727bc9febe8749e98a3812a0dc4380ceae75c29450Johann x_step_q4, filter_y, y_step_q4, w, h); 673da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 674da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 675da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else { 676da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian switch (w) { 677da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 4: 6787bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_8t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, 6797bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filt_hor, h); 680da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 681da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 8: 6827bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_8t_and_aver_dst_8w_msa(src, (int32_t)src_stride, dst, 6837bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filt_hor, h); 684da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 685da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 16: 6867bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_8t_and_aver_dst_16w_msa(src, (int32_t)src_stride, dst, 6877bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filt_hor, h); 688da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 689da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 32: 6907bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_8t_and_aver_dst_32w_msa(src, (int32_t)src_stride, dst, 6917bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filt_hor, h); 692da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 693da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 64: 6947bc9febe8749e98a3812a0dc4380ceae75c29450Johann common_hz_8t_and_aver_dst_64w_msa(src, (int32_t)src_stride, dst, 6957bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filt_hor, h); 696da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 697da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian default: 6987bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, 6997bc9febe8749e98a3812a0dc4380ceae75c29450Johann x_step_q4, filter_y, y_step_q4, w, h); 700da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 701da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 702da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 703da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 704