1/*
2 *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "./vpx_dsp_rtcd.h"
12#include "vpx/vpx_integer.h"
13#include "vpx_ports/mem.h"
14#include "vpx_ports/asmdefs_mmi.h"
15
16void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
17                            ptrdiff_t diff_stride, const uint8_t *src,
18                            ptrdiff_t src_stride, const uint8_t *pred,
19                            ptrdiff_t pred_stride) {
20  double ftmp[13];
21  uint32_t tmp[1];
22
23  if (rows == cols) {
24    switch (rows) {
25      case 4:
26        __asm__ volatile(
27            "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
28#if _MIPS_SIM == _ABIO32
29            "ulw        %[tmp0],    0x00(%[src])                        \n\t"
30            "mtc1       %[tmp0],    %[ftmp1]                            \n\t"
31            "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
32            "mtc1       %[tmp0],    %[ftmp2]                            \n\t"
33#else
34            "gslwlc1    %[ftmp1],   0x03(%[src])                        \n\t"
35            "gslwrc1    %[ftmp1],   0x00(%[src])                        \n\t"
36            "gslwlc1    %[ftmp2],   0x03(%[pred])                       \n\t"
37            "gslwrc1    %[ftmp2],   0x00(%[pred])                       \n\t"
38#endif
39            MMI_ADDU(%[src], %[src], %[src_stride])
40            MMI_ADDU(%[pred], %[pred], %[pred_stride])
41
42#if _MIPS_SIM == _ABIO32
43            "ulw        %[tmp0],    0x00(%[src])                        \n\t"
44            "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
45            "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
46            "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
47#else
48            "gslwlc1    %[ftmp3],   0x03(%[src])                        \n\t"
49            "gslwrc1    %[ftmp3],   0x00(%[src])                        \n\t"
50            "gslwlc1    %[ftmp4],   0x03(%[pred])                       \n\t"
51            "gslwrc1    %[ftmp4],   0x00(%[pred])                       \n\t"
52#endif
53            MMI_ADDU(%[src], %[src], %[src_stride])
54            MMI_ADDU(%[pred], %[pred], %[pred_stride])
55
56#if _MIPS_SIM == _ABIO32
57            "ulw        %[tmp0],    0x00(%[src])                        \n\t"
58            "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
59            "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
60            "mtc1       %[tmp0],    %[ftmp6]                            \n\t"
61#else
62            "gslwlc1    %[ftmp5],   0x03(%[src])                        \n\t"
63            "gslwrc1    %[ftmp5],   0x00(%[src])                        \n\t"
64            "gslwlc1    %[ftmp6],   0x03(%[pred])                       \n\t"
65            "gslwrc1    %[ftmp6],   0x00(%[pred])                       \n\t"
66#endif
67            MMI_ADDU(%[src], %[src], %[src_stride])
68            MMI_ADDU(%[pred], %[pred], %[pred_stride])
69
70#if _MIPS_SIM == _ABIO32
71            "ulw        %[tmp0],    0x00(%[src])                        \n\t"
72            "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
73            "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
74            "mtc1       %[tmp0],    %[ftmp8]                            \n\t"
75#else
76            "gslwlc1    %[ftmp7],   0x03(%[src])                        \n\t"
77            "gslwrc1    %[ftmp7],   0x00(%[src])                        \n\t"
78            "gslwlc1    %[ftmp8],   0x03(%[pred])                       \n\t"
79            "gslwrc1    %[ftmp8],   0x00(%[pred])                       \n\t"
80#endif
81            "punpcklbh  %[ftmp9],   %[ftmp1],           %[ftmp0]        \n\t"
82            "punpcklbh  %[ftmp10],  %[ftmp2],           %[ftmp0]        \n\t"
83            "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
84            "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
85            "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
86            MMI_ADDU(%[diff], %[diff], %[diff_stride])
87            "punpcklbh  %[ftmp9],   %[ftmp3],           %[ftmp0]        \n\t"
88            "punpcklbh  %[ftmp10],  %[ftmp4],           %[ftmp0]        \n\t"
89            "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
90            "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
91            "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
92            MMI_ADDU(%[diff], %[diff], %[diff_stride])
93            "punpcklbh  %[ftmp9],   %[ftmp5],           %[ftmp0]        \n\t"
94            "punpcklbh  %[ftmp10],  %[ftmp6],           %[ftmp0]        \n\t"
95            "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
96            "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
97            "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
98            MMI_ADDU(%[diff], %[diff], %[diff_stride])
99            "punpcklbh  %[ftmp9],   %[ftmp7],           %[ftmp0]        \n\t"
100            "punpcklbh  %[ftmp10],  %[ftmp8],           %[ftmp0]        \n\t"
101            "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
102            "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
103            "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
104            : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
105              [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
106              [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
107              [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
108              [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
109              [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
110#if _MIPS_SIM == _ABIO32
111              [tmp0] "=&r"(tmp[0]),
112#endif
113              [src] "+&r"(src), [pred] "+&r"(pred), [diff] "+&r"(diff)
114            : [src_stride] "r"((mips_reg)src_stride),
115              [pred_stride] "r"((mips_reg)pred_stride),
116              [diff_stride] "r"((mips_reg)(diff_stride * 2))
117            : "memory");
118        break;
119      case 8:
120        __asm__ volatile(
121            "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
122            "li         %[tmp0],    0x02                                \n\t"
123            "1:                                                         \n\t"
124            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
125            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
126            "gsldlc1    %[ftmp2],   0x07(%[pred])                       \n\t"
127            "gsldrc1    %[ftmp2],   0x00(%[pred])                       \n\t"
128            MMI_ADDU(%[src], %[src], %[src_stride])
129            MMI_ADDU(%[pred], %[pred], %[pred_stride])
130            "gsldlc1    %[ftmp3],   0x07(%[src])                        \n\t"
131            "gsldrc1    %[ftmp3],   0x00(%[src])                        \n\t"
132            "gsldlc1    %[ftmp4],   0x07(%[pred])                       \n\t"
133            "gsldrc1    %[ftmp4],   0x00(%[pred])                       \n\t"
134            MMI_ADDU(%[src], %[src], %[src_stride])
135            MMI_ADDU(%[pred], %[pred], %[pred_stride])
136            "gsldlc1    %[ftmp5],   0x07(%[src])                        \n\t"
137            "gsldrc1    %[ftmp5],   0x00(%[src])                        \n\t"
138            "gsldlc1    %[ftmp6],   0x07(%[pred])                       \n\t"
139            "gsldrc1    %[ftmp6],   0x00(%[pred])                       \n\t"
140            MMI_ADDU(%[src], %[src], %[src_stride])
141            MMI_ADDU(%[pred], %[pred], %[pred_stride])
142            "gsldlc1    %[ftmp7],   0x07(%[src])                        \n\t"
143            "gsldrc1    %[ftmp7],   0x00(%[src])                        \n\t"
144            "gsldlc1    %[ftmp8],   0x07(%[pred])                       \n\t"
145            "gsldrc1    %[ftmp8],   0x00(%[pred])                       \n\t"
146            MMI_ADDU(%[src], %[src], %[src_stride])
147            MMI_ADDU(%[pred], %[pred], %[pred_stride])
148            "punpcklbh  %[ftmp9],   %[ftmp1],           %[ftmp0]        \n\t"
149            "punpckhbh  %[ftmp10],  %[ftmp1],           %[ftmp0]        \n\t"
150            "punpcklbh  %[ftmp11],  %[ftmp2],           %[ftmp0]        \n\t"
151            "punpckhbh  %[ftmp12],  %[ftmp2],           %[ftmp0]        \n\t"
152            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
153            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
154            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
155            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
156            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
157            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
158            MMI_ADDU(%[diff], %[diff], %[diff_stride])
159            "punpcklbh  %[ftmp9],   %[ftmp3],           %[ftmp0]        \n\t"
160            "punpckhbh  %[ftmp10],  %[ftmp3],           %[ftmp0]        \n\t"
161            "punpcklbh  %[ftmp11],  %[ftmp4],           %[ftmp0]        \n\t"
162            "punpckhbh  %[ftmp12],  %[ftmp4],           %[ftmp0]        \n\t"
163            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
164            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
165            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
166            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
167            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
168            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
169            MMI_ADDU(%[diff], %[diff], %[diff_stride])
170            "punpcklbh  %[ftmp9],   %[ftmp5],           %[ftmp0]        \n\t"
171            "punpckhbh  %[ftmp10],  %[ftmp5],           %[ftmp0]        \n\t"
172            "punpcklbh  %[ftmp11],  %[ftmp6],           %[ftmp0]        \n\t"
173            "punpckhbh  %[ftmp12],  %[ftmp6],           %[ftmp0]        \n\t"
174            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
175            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
176            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
177            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
178            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
179            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
180            MMI_ADDU(%[diff], %[diff], %[diff_stride])
181            "punpcklbh  %[ftmp9],   %[ftmp7],           %[ftmp0]        \n\t"
182            "punpckhbh  %[ftmp10],  %[ftmp7],           %[ftmp0]        \n\t"
183            "punpcklbh  %[ftmp11],  %[ftmp8],           %[ftmp0]        \n\t"
184            "punpckhbh  %[ftmp12],  %[ftmp8],           %[ftmp0]        \n\t"
185            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
186            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
187            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
188            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
189            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
190            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
191            MMI_ADDU(%[diff], %[diff], %[diff_stride])
192            "addiu      %[tmp0],    %[tmp0],            -0x01           \n\t"
193            "bnez       %[tmp0],    1b                                  \n\t"
194            : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
195              [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
196              [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
197              [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
198              [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
199              [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
200              [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
201              [pred] "+&r"(pred), [diff] "+&r"(diff)
202            : [pred_stride] "r"((mips_reg)pred_stride),
203              [src_stride] "r"((mips_reg)src_stride),
204              [diff_stride] "r"((mips_reg)(diff_stride * 2))
205            : "memory");
206        break;
207      case 16:
208        __asm__ volatile(
209            "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
210            "li         %[tmp0],    0x08                                \n\t"
211            "1:                                                         \n\t"
212            "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
213            "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
214            "gsldlc1    %[ftmp2],   0x07(%[pred])                       \n\t"
215            "gsldrc1    %[ftmp2],   0x00(%[pred])                       \n\t"
216            "gsldlc1    %[ftmp3],   0x0f(%[src])                        \n\t"
217            "gsldrc1    %[ftmp3],   0x08(%[src])                        \n\t"
218            "gsldlc1    %[ftmp4],   0x0f(%[pred])                       \n\t"
219            "gsldrc1    %[ftmp4],   0x08(%[pred])                       \n\t"
220            MMI_ADDU(%[src], %[src], %[src_stride])
221            MMI_ADDU(%[pred], %[pred], %[pred_stride])
222            "gsldlc1    %[ftmp5],   0x07(%[src])                        \n\t"
223            "gsldrc1    %[ftmp5],   0x00(%[src])                        \n\t"
224            "gsldlc1    %[ftmp6],   0x07(%[pred])                       \n\t"
225            "gsldrc1    %[ftmp6],   0x00(%[pred])                       \n\t"
226            "gsldlc1    %[ftmp7],   0x0f(%[src])                        \n\t"
227            "gsldrc1    %[ftmp7],   0x08(%[src])                        \n\t"
228            "gsldlc1    %[ftmp8],   0x0f(%[pred])                       \n\t"
229            "gsldrc1    %[ftmp8],   0x08(%[pred])                       \n\t"
230            MMI_ADDU(%[src], %[src], %[src_stride])
231            MMI_ADDU(%[pred], %[pred], %[pred_stride])
232            "punpcklbh  %[ftmp9],   %[ftmp1],           %[ftmp0]        \n\t"
233            "punpckhbh  %[ftmp10],  %[ftmp1],           %[ftmp0]        \n\t"
234            "punpcklbh  %[ftmp11],  %[ftmp2],           %[ftmp0]        \n\t"
235            "punpckhbh  %[ftmp12],  %[ftmp2],           %[ftmp0]        \n\t"
236            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
237            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
238            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
239            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
240            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
241            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
242            "punpcklbh  %[ftmp9],   %[ftmp3],           %[ftmp0]        \n\t"
243            "punpckhbh  %[ftmp10],  %[ftmp3],           %[ftmp0]        \n\t"
244            "punpcklbh  %[ftmp11],  %[ftmp4],           %[ftmp0]        \n\t"
245            "punpckhbh  %[ftmp12],  %[ftmp4],           %[ftmp0]        \n\t"
246            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
247            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
248            "gssdlc1    %[ftmp9],   0x17(%[diff])                       \n\t"
249            "gssdrc1    %[ftmp9],   0x10(%[diff])                       \n\t"
250            "gssdlc1    %[ftmp10],  0x1f(%[diff])                       \n\t"
251            "gssdrc1    %[ftmp10],  0x18(%[diff])                       \n\t"
252            MMI_ADDU(%[diff], %[diff], %[diff_stride])
253            "punpcklbh  %[ftmp9],   %[ftmp5],           %[ftmp0]        \n\t"
254            "punpckhbh  %[ftmp10],  %[ftmp5],           %[ftmp0]        \n\t"
255            "punpcklbh  %[ftmp11],  %[ftmp6],           %[ftmp0]        \n\t"
256            "punpckhbh  %[ftmp12],  %[ftmp6],           %[ftmp0]        \n\t"
257            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
258            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
259            "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
260            "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
261            "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
262            "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
263            "punpcklbh  %[ftmp9],   %[ftmp7],           %[ftmp0]        \n\t"
264            "punpckhbh  %[ftmp10],  %[ftmp7],           %[ftmp0]        \n\t"
265            "punpcklbh  %[ftmp11],  %[ftmp8],           %[ftmp0]        \n\t"
266            "punpckhbh  %[ftmp12],  %[ftmp8],           %[ftmp0]        \n\t"
267            "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
268            "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
269            "gssdlc1    %[ftmp9],   0x17(%[diff])                       \n\t"
270            "gssdrc1    %[ftmp9],   0x10(%[diff])                       \n\t"
271            "gssdlc1    %[ftmp10],  0x1f(%[diff])                       \n\t"
272            "gssdrc1    %[ftmp10],  0x18(%[diff])                       \n\t"
273            MMI_ADDU(%[diff], %[diff], %[diff_stride])
274            "addiu      %[tmp0],    %[tmp0],            -0x01           \n\t"
275            "bnez       %[tmp0],    1b                                  \n\t"
276            : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
277              [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
278              [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
279              [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
280              [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
281              [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
282              [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
283              [pred] "+&r"(pred), [diff] "+&r"(diff)
284            : [pred_stride] "r"((mips_reg)pred_stride),
285              [src_stride] "r"((mips_reg)src_stride),
286              [diff_stride] "r"((mips_reg)(diff_stride * 2))
287            : "memory");
288        break;
289      case 32:
290        vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
291                             pred, pred_stride);
292        break;
293      case 64:
294        vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
295                             pred, pred_stride);
296        break;
297      default:
298        vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
299                             pred, pred_stride);
300        break;
301    }
302  } else {
303    vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred,
304                         pred_stride);
305  }
306}
307