1da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian/*
2da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *
4da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  Use of this source code is governed by a BSD-style license
5da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  that can be found in the LICENSE file in the root of the source
6da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  tree. An additional intellectual property rights grant can be found
7da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  in the file PATENTS.  All contributing project authors may
8da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian *  be found in the AUTHORS file in the root of the source tree.
9da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian */
10da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
11da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include <string.h>
12df37111358d02836cb29bbcb9c6e4c95dff90a16Johann#include "./vpx_dsp_rtcd.h"
13da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/mips/macros_msa.h"
14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_width8_msa(const uint8_t *src, int32_t src_stride,
16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                            uint8_t *dst, int32_t dst_stride, int32_t height) {
17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t cnt;
18da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
19da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
20da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
21da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  if (0 == height % 12) {
22da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (cnt = (height / 12); cnt--;) {
23da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
24da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (8 * src_stride);
25da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out0 = __msa_copy_u_d((v2i64)src0, 0);
27da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out1 = __msa_copy_u_d((v2i64)src1, 0);
28da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out2 = __msa_copy_u_d((v2i64)src2, 0);
29da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out3 = __msa_copy_u_d((v2i64)src3, 0);
30da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out4 = __msa_copy_u_d((v2i64)src4, 0);
31da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out5 = __msa_copy_u_d((v2i64)src5, 0);
32da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out6 = __msa_copy_u_d((v2i64)src6, 0);
33da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out7 = __msa_copy_u_d((v2i64)src7, 0);
34da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
35da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      SD4(out0, out1, out2, out3, dst, dst_stride);
36da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
37da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      SD4(out4, out5, out6, out7, dst, dst_stride);
38da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
39da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src, src_stride, src0, src1, src2, src3);
41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (4 * src_stride);
42da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out0 = __msa_copy_u_d((v2i64)src0, 0);
44da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out1 = __msa_copy_u_d((v2i64)src1, 0);
45da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out2 = __msa_copy_u_d((v2i64)src2, 0);
46da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out3 = __msa_copy_u_d((v2i64)src3, 0);
47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      SD4(out0, out1, out2, out3, dst, dst_stride);
48da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
49da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
50da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  } else if (0 == height % 8) {
51da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (cnt = height >> 3; cnt--;) {
52da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
53da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (8 * src_stride);
54da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
55da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out0 = __msa_copy_u_d((v2i64)src0, 0);
56da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out1 = __msa_copy_u_d((v2i64)src1, 0);
57da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out2 = __msa_copy_u_d((v2i64)src2, 0);
58da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out3 = __msa_copy_u_d((v2i64)src3, 0);
59da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out4 = __msa_copy_u_d((v2i64)src4, 0);
60da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out5 = __msa_copy_u_d((v2i64)src5, 0);
61da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out6 = __msa_copy_u_d((v2i64)src6, 0);
62da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out7 = __msa_copy_u_d((v2i64)src7, 0);
63da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
64da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      SD4(out0, out1, out2, out3, dst, dst_stride);
65da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
66da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      SD4(out4, out5, out6, out7, dst, dst_stride);
67da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
68da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
69da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  } else if (0 == height % 4) {
70da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (cnt = (height / 4); cnt--;) {
71da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src, src_stride, src0, src1, src2, src3);
72da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (4 * src_stride);
73da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out0 = __msa_copy_u_d((v2i64)src0, 0);
74da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out1 = __msa_copy_u_d((v2i64)src1, 0);
75da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out2 = __msa_copy_u_d((v2i64)src2, 0);
76da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out3 = __msa_copy_u_d((v2i64)src3, 0);
77da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
78da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      SD4(out0, out1, out2, out3, dst, dst_stride);
79da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
80da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
81da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  } else if (0 == height % 2) {
82da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (cnt = (height / 2); cnt--;) {
83da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB2(src, src_stride, src0, src1);
84da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (2 * src_stride);
85da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out0 = __msa_copy_u_d((v2i64)src0, 0);
86da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      out1 = __msa_copy_u_d((v2i64)src1, 0);
87da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
88da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      SD(out0, dst);
89da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += dst_stride;
90da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      SD(out1, dst);
91da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += dst_stride;
92da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
93da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
94da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
95da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
96da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
97da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                                  uint8_t *dst, int32_t dst_stride,
98da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                                  int32_t height, int32_t width) {
99da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t cnt, loop_cnt;
100da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  const uint8_t *src_tmp;
101da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  uint8_t *dst_tmp;
102da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
103da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
104da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  for (cnt = (width >> 4); cnt--;) {
105da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    src_tmp = src;
106da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    dst_tmp = dst;
107da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
108da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (loop_cnt = (height >> 3); loop_cnt--;) {
1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann      LD_UB8(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6,
1107bc9febe8749e98a3812a0dc4380ceae75c29450Johann             src7);
111da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src_tmp += (8 * src_stride);
112da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann      ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst_tmp,
1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann             dst_stride);
115da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst_tmp += (8 * dst_stride);
116da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
118da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    src += 16;
119da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    dst += 16;
120da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
121da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
122da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
123da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_width16_msa(const uint8_t *src, int32_t src_stride,
124da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                             uint8_t *dst, int32_t dst_stride, int32_t height) {
125da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t cnt;
126da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
127da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
128da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  if (0 == height % 12) {
129da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (cnt = (height / 12); cnt--;) {
130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (8 * src_stride);
132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
133da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (8 * dst_stride);
134da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
135da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src, src_stride, src0, src1, src2, src3);
136da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (4 * src_stride);
137da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
138da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
139da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
140da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  } else if (0 == height % 8) {
141da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
142da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  } else if (0 == height % 4) {
143da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (cnt = (height >> 2); cnt--;) {
144da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src, src_stride, src0, src1, src2, src3);
145da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (4 * src_stride);
146da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
150da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
151da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
152da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
153da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_width32_msa(const uint8_t *src, int32_t src_stride,
154da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                             uint8_t *dst, int32_t dst_stride, int32_t height) {
155da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  int32_t cnt;
156da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
157da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
158da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  if (0 == height % 12) {
159da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (cnt = (height / 12); cnt--;) {
160da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src, src_stride, src0, src1, src2, src3);
161da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
162da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (4 * src_stride);
163da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
164da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
165da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
166da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
167da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src, src_stride, src0, src1, src2, src3);
168da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
169da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (4 * src_stride);
170da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
171da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
172da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
173da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
174da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src, src_stride, src0, src1, src2, src3);
175da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
176da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (4 * src_stride);
177da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
178da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
179da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
180da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
181da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  } else if (0 == height % 8) {
182da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32);
183da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  } else if (0 == height % 4) {
184da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    for (cnt = (height >> 2); cnt--;) {
185da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src, src_stride, src0, src1, src2, src3);
186da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
187da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      src += (4 * src_stride);
188da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
189da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
190da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      dst += (4 * dst_stride);
191da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
192da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
193da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
194da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
195da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianstatic void copy_width64_msa(const uint8_t *src, int32_t src_stride,
196da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                             uint8_t *dst, int32_t dst_stride, int32_t height) {
197da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
198da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
199da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
200da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
201da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                           uint8_t *dst, ptrdiff_t dst_stride,
202df37111358d02836cb29bbcb9c6e4c95dff90a16Johann                           const InterpKernel *filter, int x0_q4,
203df37111358d02836cb29bbcb9c6e4c95dff90a16Johann                           int32_t x_step_q4, int y0_q4, int32_t y_step_q4,
204da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian                           int32_t w, int32_t h) {
205df37111358d02836cb29bbcb9c6e4c95dff90a16Johann  (void)filter;
206df37111358d02836cb29bbcb9c6e4c95dff90a16Johann  (void)x0_q4;
207df37111358d02836cb29bbcb9c6e4c95dff90a16Johann  (void)x_step_q4;
208df37111358d02836cb29bbcb9c6e4c95dff90a16Johann  (void)y0_q4;
209df37111358d02836cb29bbcb9c6e4c95dff90a16Johann  (void)y_step_q4;
210da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian
211da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  switch (w) {
212da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    case 4: {
213da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      uint32_t cnt, tmp;
214da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      /* 1 word storage */
215da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      for (cnt = h; cnt--;) {
216da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        tmp = LW(src);
217da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        SW(tmp, dst);
218da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        src += src_stride;
219da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        dst += dst_stride;
220da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      }
221da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      break;
222da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
223da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    case 8: {
224da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      copy_width8_msa(src, src_stride, dst, dst_stride, h);
225da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      break;
226da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
227da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    case 16: {
228da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      copy_width16_msa(src, src_stride, dst, dst_stride, h);
229da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      break;
230da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
231da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    case 32: {
232da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      copy_width32_msa(src, src_stride, dst, dst_stride, h);
233da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      break;
234da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
235da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    case 64: {
236da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      copy_width64_msa(src, src_stride, dst, dst_stride, h);
237da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      break;
238da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
239da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    default: {
240da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      uint32_t cnt;
241da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      for (cnt = h; cnt--;) {
242da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        memcpy(dst, src, w);
243da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        src += src_stride;
244da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian        dst += dst_stride;
245da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      }
246da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian      break;
247da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian    }
248da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian  }
249da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian}
250