1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/* 2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * 4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * Use of this source code is governed by a BSD-style license 5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * that can be found in the LICENSE file in the root of the source 6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * tree. An additional intellectual property rights grant can be found 7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * in the file PATENTS. All contributing project authors may 8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian * be found in the AUTHORS file in the root of the source tree. 9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */ 10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <string.h> 12df37111358d02836cb29bbcb9c6e4c95dff90a16Johann#include "./vpx_dsp_rtcd.h" 13da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/mips/macros_msa.h" 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_width8_msa(const uint8_t *src, int32_t src_stride, 16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst, int32_t dst_stride, int32_t height) { 17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t cnt; 18da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint64_t out0, out1, out2, out3, out4, out5, out6, out7; 19da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 21da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (0 == height % 12) { 22da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = (height / 12); cnt--;) { 23da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (8 * src_stride); 25da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out0 = __msa_copy_u_d((v2i64)src0, 0); 27da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out1 = __msa_copy_u_d((v2i64)src1, 0); 28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out2 = __msa_copy_u_d((v2i64)src2, 0); 29da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out3 = __msa_copy_u_d((v2i64)src3, 0); 30da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out4 = __msa_copy_u_d((v2i64)src4, 0); 31da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out5 = __msa_copy_u_d((v2i64)src5, 0); 32da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out6 = __msa_copy_u_d((v2i64)src6, 0); 33da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out7 = __msa_copy_u_d((v2i64)src7, 0); 34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 35da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SD4(out0, out1, out2, out3, dst, dst_stride); 36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 37da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SD4(out4, out5, out6, out7, dst, dst_stride); 38da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 39da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src, src_stride, src0, src1, src2, src3); 41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 42da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out0 = __msa_copy_u_d((v2i64)src0, 0); 44da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out1 = __msa_copy_u_d((v2i64)src1, 0); 45da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out2 = __msa_copy_u_d((v2i64)src2, 0); 46da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out3 = __msa_copy_u_d((v2i64)src3, 0); 47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SD4(out0, out1, out2, out3, dst, dst_stride); 48da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 50da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else if (0 == height % 8) { 51da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = height >> 3; cnt--;) { 52da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 53da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (8 * src_stride); 54da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 55da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out0 = __msa_copy_u_d((v2i64)src0, 0); 56da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out1 = __msa_copy_u_d((v2i64)src1, 0); 57da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out2 = __msa_copy_u_d((v2i64)src2, 0); 58da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out3 = __msa_copy_u_d((v2i64)src3, 0); 59da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out4 = __msa_copy_u_d((v2i64)src4, 0); 60da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out5 = __msa_copy_u_d((v2i64)src5, 0); 61da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out6 = __msa_copy_u_d((v2i64)src6, 0); 62da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out7 = __msa_copy_u_d((v2i64)src7, 0); 63da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 64da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SD4(out0, out1, out2, out3, dst, dst_stride); 65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 66da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SD4(out4, out5, out6, out7, dst, dst_stride); 67da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 68da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 69da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else if (0 == height % 4) { 70da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = (height / 4); cnt--;) { 71da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src, src_stride, src0, src1, src2, src3); 72da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 73da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out0 = __msa_copy_u_d((v2i64)src0, 0); 74da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out1 = __msa_copy_u_d((v2i64)src1, 0); 75da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out2 = __msa_copy_u_d((v2i64)src2, 0); 76da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out3 = __msa_copy_u_d((v2i64)src3, 0); 77da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SD4(out0, out1, out2, out3, dst, dst_stride); 79da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 81da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else if (0 == height % 2) { 82da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = (height / 2); cnt--;) { 83da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB2(src, src_stride, src0, src1); 84da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (2 * src_stride); 85da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out0 = __msa_copy_u_d((v2i64)src0, 0); 86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian out1 = __msa_copy_u_d((v2i64)src1, 0); 87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SD(out0, dst); 89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SD(out1, dst); 91da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 92da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 93da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 94da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 96da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride, 97da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst, int32_t dst_stride, 98da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t height, int32_t width) { 99da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t cnt, loop_cnt; 100da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian const uint8_t *src_tmp; 101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst_tmp; 102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = (width >> 4); cnt--;) { 105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src_tmp = src; 106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst_tmp = dst; 107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (loop_cnt = (height >> 3); loop_cnt--;) { 1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann LD_UB8(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6, 1107bc9febe8749e98a3812a0dc4380ceae75c29450Johann src7); 111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src_tmp += (8 * src_stride); 112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst_tmp, 1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann dst_stride); 115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst_tmp += (8 * dst_stride); 116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += 16; 119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += 16; 120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_width16_msa(const uint8_t *src, int32_t src_stride, 124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst, int32_t dst_stride, int32_t height) { 125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t cnt; 126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (0 == height % 12) { 129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = (height / 12); cnt--;) { 130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); 131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (8 * src_stride); 132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride); 133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (8 * dst_stride); 134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src, src_stride, src0, src1, src2, src3); 136da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src0, src1, src2, src3, dst, dst_stride); 138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 139da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 140da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else if (0 == height % 8) { 141da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16); 142da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else if (0 == height % 4) { 143da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = (height >> 2); cnt--;) { 144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src, src_stride, src0, src1, src2, src3); 145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src0, src1, src2, src3, dst, dst_stride); 148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 150da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 151da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_width32_msa(const uint8_t *src, int32_t src_stride, 154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst, int32_t dst_stride, int32_t height) { 155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t cnt; 156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian v16u8 src0, src1, src2, src3, src4, src5, src6, src7; 157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (0 == height % 12) { 159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = (height / 12); cnt--;) { 160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src, src_stride, src0, src1, src2, src3); 161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src + 16, src_stride, src4, src5, src6, src7); 162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src0, src1, src2, src3, dst, dst_stride); 164da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); 165da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 166da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src, src_stride, src0, src1, src2, src3); 168da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src + 16, src_stride, src4, src5, src6, src7); 169da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 170da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src0, src1, src2, src3, dst, dst_stride); 171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); 172da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 173da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src, src_stride, src0, src1, src2, src3); 175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src + 16, src_stride, src4, src5, src6, src7); 176da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 177da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src0, src1, src2, src3, dst, dst_stride); 178da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); 179da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 180da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else if (0 == height % 8) { 182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32); 183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } else if (0 == height % 4) { 184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = (height >> 2); cnt--;) { 185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src, src_stride, src0, src1, src2, src3); 186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian LD_UB4(src + 16, src_stride, src4, src5, src6, src7); 187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += (4 * src_stride); 188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src0, src1, src2, src3, dst, dst_stride); 189da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); 190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += (4 * dst_stride); 191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 194da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_width64_msa(const uint8_t *src, int32_t src_stride, 196da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst, int32_t dst_stride, int32_t height) { 197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64); 198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 199da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 200da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride, 201da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *dst, ptrdiff_t dst_stride, 202df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const InterpKernel *filter, int x0_q4, 203df37111358d02836cb29bbcb9c6e4c95dff90a16Johann int32_t x_step_q4, int y0_q4, int32_t y_step_q4, 204da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian int32_t w, int32_t h) { 205df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)filter; 206df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)x0_q4; 207df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)x_step_q4; 208df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)y0_q4; 209df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)y_step_q4; 210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian switch (w) { 212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 4: { 213da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t cnt, tmp; 214da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* 1 word storage */ 215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = h; cnt--;) { 216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian tmp = LW(src); 217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian SW(tmp, dst); 218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 221da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 8: { 224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian copy_width8_msa(src, src_stride, dst, dst_stride, h); 225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 16: { 228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian copy_width16_msa(src, src_stride, dst, dst_stride, h); 229da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 32: { 232da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian copy_width32_msa(src, src_stride, dst, dst_stride, h); 233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 234da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 235da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 64: { 236da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian copy_width64_msa(src, src_stride, dst, dst_stride, h); 237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 239da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian default: { 240da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t cnt; 241da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian for (cnt = h; cnt--;) { 242da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian memcpy(dst, src, w); 243da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src += src_stride; 244da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian dst += dst_stride; 245da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 246da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 247da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian } 249da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian} 250