1/*
2 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "vpx_dsp/mips/common_dspr2.h"
12
13#if HAVE_DSPR2
14void vpx_h_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride,
15                                 const uint8_t *above, const uint8_t *left) {
16  int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
17  int32_t tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
18  (void)above;
19
20  __asm__ __volatile__(
21      "lb         %[tmp1],      (%[left])                    \n\t"
22      "lb         %[tmp2],      1(%[left])                   \n\t"
23      "lb         %[tmp3],      2(%[left])                   \n\t"
24      "lb         %[tmp4],      3(%[left])                   \n\t"
25      "lb         %[tmp5],      4(%[left])                   \n\t"
26      "lb         %[tmp6],      5(%[left])                   \n\t"
27      "lb         %[tmp7],      6(%[left])                   \n\t"
28      "lb         %[tmp8],      7(%[left])                   \n\t"
29      "lb         %[tmp9],      8(%[left])                   \n\t"
30      "lb         %[tmp10],     9(%[left])                   \n\t"
31      "lb         %[tmp11],     10(%[left])                  \n\t"
32      "lb         %[tmp12],     11(%[left])                  \n\t"
33      "lb         %[tmp13],     12(%[left])                  \n\t"
34      "lb         %[tmp14],     13(%[left])                  \n\t"
35      "lb         %[tmp15],     14(%[left])                  \n\t"
36      "lb         %[tmp16],     15(%[left])                  \n\t"
37
38      "replv.qb   %[tmp1],      %[tmp1]                      \n\t"
39      "replv.qb   %[tmp2],      %[tmp2]                      \n\t"
40      "replv.qb   %[tmp3],      %[tmp3]                      \n\t"
41      "replv.qb   %[tmp4],      %[tmp4]                      \n\t"
42      "replv.qb   %[tmp5],      %[tmp5]                      \n\t"
43      "replv.qb   %[tmp6],      %[tmp6]                      \n\t"
44      "replv.qb   %[tmp7],      %[tmp7]                      \n\t"
45      "replv.qb   %[tmp8],      %[tmp8]                      \n\t"
46      "replv.qb   %[tmp9],      %[tmp9]                      \n\t"
47      "replv.qb   %[tmp10],     %[tmp10]                     \n\t"
48      "replv.qb   %[tmp11],     %[tmp11]                     \n\t"
49      "replv.qb   %[tmp12],     %[tmp12]                     \n\t"
50      "replv.qb   %[tmp13],     %[tmp13]                     \n\t"
51      "replv.qb   %[tmp14],     %[tmp14]                     \n\t"
52      "replv.qb   %[tmp15],     %[tmp15]                     \n\t"
53      "replv.qb   %[tmp16],     %[tmp16]                     \n\t"
54
55      "sw         %[tmp1],      (%[dst])                     \n\t"
56      "sw         %[tmp1],      4(%[dst])                    \n\t"
57      "sw         %[tmp1],      8(%[dst])                    \n\t"
58      "sw         %[tmp1],      12(%[dst])                   \n\t"
59
60      "add        %[dst],       %[dst],         %[stride]    \n\t"
61      "sw         %[tmp2],      (%[dst])                     \n\t"
62      "sw         %[tmp2],      4(%[dst])                    \n\t"
63      "sw         %[tmp2],      8(%[dst])                    \n\t"
64      "sw         %[tmp2],      12(%[dst])                   \n\t"
65
66      "add        %[dst],       %[dst],         %[stride]    \n\t"
67      "sw         %[tmp3],      (%[dst])                     \n\t"
68      "sw         %[tmp3],      4(%[dst])                    \n\t"
69      "sw         %[tmp3],      8(%[dst])                    \n\t"
70      "sw         %[tmp3],      12(%[dst])                   \n\t"
71
72      "add        %[dst],       %[dst],         %[stride]    \n\t"
73      "sw         %[tmp4],      (%[dst])                     \n\t"
74      "sw         %[tmp4],      4(%[dst])                    \n\t"
75      "sw         %[tmp4],      8(%[dst])                    \n\t"
76      "sw         %[tmp4],      12(%[dst])                   \n\t"
77
78      "add        %[dst],       %[dst],         %[stride]    \n\t"
79      "sw         %[tmp5],      (%[dst])                     \n\t"
80      "sw         %[tmp5],      4(%[dst])                    \n\t"
81      "sw         %[tmp5],      8(%[dst])                    \n\t"
82      "sw         %[tmp5],      12(%[dst])                   \n\t"
83
84      "add        %[dst],       %[dst],         %[stride]    \n\t"
85      "sw         %[tmp6],      (%[dst])                     \n\t"
86      "sw         %[tmp6],      4(%[dst])                    \n\t"
87      "sw         %[tmp6],      8(%[dst])                    \n\t"
88      "sw         %[tmp6],      12(%[dst])                   \n\t"
89
90      "add        %[dst],       %[dst],         %[stride]    \n\t"
91      "sw         %[tmp7],      (%[dst])                     \n\t"
92      "sw         %[tmp7],      4(%[dst])                    \n\t"
93      "sw         %[tmp7],      8(%[dst])                    \n\t"
94      "sw         %[tmp7],      12(%[dst])                   \n\t"
95
96      "add        %[dst],       %[dst],         %[stride]    \n\t"
97      "sw         %[tmp8],      (%[dst])                     \n\t"
98      "sw         %[tmp8],      4(%[dst])                    \n\t"
99      "sw         %[tmp8],      8(%[dst])                    \n\t"
100      "sw         %[tmp8],      12(%[dst])                   \n\t"
101
102      "add        %[dst],       %[dst],         %[stride]    \n\t"
103      "sw         %[tmp9],      (%[dst])                     \n\t"
104      "sw         %[tmp9],      4(%[dst])                    \n\t"
105      "sw         %[tmp9],      8(%[dst])                    \n\t"
106      "sw         %[tmp9],      12(%[dst])                   \n\t"
107
108      "add        %[dst],       %[dst],         %[stride]    \n\t"
109      "sw         %[tmp10],     (%[dst])                     \n\t"
110      "sw         %[tmp10],     4(%[dst])                    \n\t"
111      "sw         %[tmp10],     8(%[dst])                    \n\t"
112      "sw         %[tmp10],     12(%[dst])                   \n\t"
113
114      "add        %[dst],       %[dst],         %[stride]    \n\t"
115      "sw         %[tmp11],     (%[dst])                     \n\t"
116      "sw         %[tmp11],     4(%[dst])                    \n\t"
117      "sw         %[tmp11],     8(%[dst])                    \n\t"
118      "sw         %[tmp11],     12(%[dst])                   \n\t"
119
120      "add        %[dst],       %[dst],         %[stride]    \n\t"
121      "sw         %[tmp12],     (%[dst])                     \n\t"
122      "sw         %[tmp12],     4(%[dst])                    \n\t"
123      "sw         %[tmp12],     8(%[dst])                    \n\t"
124      "sw         %[tmp12],     12(%[dst])                   \n\t"
125
126      "add        %[dst],       %[dst],         %[stride]    \n\t"
127      "sw         %[tmp13],     (%[dst])                     \n\t"
128      "sw         %[tmp13],     4(%[dst])                    \n\t"
129      "sw         %[tmp13],     8(%[dst])                    \n\t"
130      "sw         %[tmp13],     12(%[dst])                   \n\t"
131
132      "add        %[dst],       %[dst],         %[stride]    \n\t"
133      "sw         %[tmp14],     (%[dst])                     \n\t"
134      "sw         %[tmp14],     4(%[dst])                    \n\t"
135      "sw         %[tmp14],     8(%[dst])                    \n\t"
136      "sw         %[tmp14],     12(%[dst])                   \n\t"
137
138      "add        %[dst],       %[dst],         %[stride]    \n\t"
139      "sw         %[tmp15],     (%[dst])                     \n\t"
140      "sw         %[tmp15],     4(%[dst])                    \n\t"
141      "sw         %[tmp15],     8(%[dst])                    \n\t"
142      "sw         %[tmp15],     12(%[dst])                   \n\t"
143
144      "add        %[dst],       %[dst],         %[stride]    \n\t"
145      "sw         %[tmp16],     (%[dst])                     \n\t"
146      "sw         %[tmp16],     4(%[dst])                    \n\t"
147      "sw         %[tmp16],     8(%[dst])                    \n\t"
148      "sw         %[tmp16],     12(%[dst])                   \n\t"
149
150      : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
151        [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7),
152        [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8), [tmp9] "=&r"(tmp9),
153        [tmp10] "=&r"(tmp10), [tmp11] "=&r"(tmp11), [tmp12] "=&r"(tmp12),
154        [tmp13] "=&r"(tmp13), [tmp14] "=&r"(tmp14), [tmp15] "=&r"(tmp15),
155        [tmp16] "=&r"(tmp16)
156      : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
157}
158
159void vpx_dc_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride,
160                                  const uint8_t *above, const uint8_t *left) {
161  int32_t expected_dc;
162  int32_t average;
163  int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
164  int32_t above2, left2;
165
166  __asm__ __volatile__(
167      "lw              %[above1],           (%[above])                    \n\t"
168      "lw              %[above2],           4(%[above])                   \n\t"
169      "lw              %[left1],            (%[left])                     \n\t"
170      "lw              %[left2],            4(%[left])                    \n\t"
171
172      "preceu.ph.qbl   %[above_l1],         %[above1]                     \n\t"
173      "preceu.ph.qbr   %[above_r1],         %[above1]                     \n\t"
174      "preceu.ph.qbl   %[left_l1],          %[left1]                      \n\t"
175      "preceu.ph.qbr   %[left_r1],          %[left1]                      \n\t"
176
177      "addu.ph         %[average],          %[above_r1],     %[above_l1]  \n\t"
178      "addu.ph         %[average],          %[average],      %[left_l1]   \n\t"
179      "addu.ph         %[average],          %[average],      %[left_r1]   \n\t"
180
181      "preceu.ph.qbl   %[above_l1],         %[above2]                     \n\t"
182      "preceu.ph.qbr   %[above_r1],         %[above2]                     \n\t"
183      "preceu.ph.qbl   %[left_l1],          %[left2]                      \n\t"
184      "preceu.ph.qbr   %[left_r1],          %[left2]                      \n\t"
185
186      "addu.ph         %[average],          %[average],      %[above_l1]  \n\t"
187      "addu.ph         %[average],          %[average],      %[above_r1]  \n\t"
188      "addu.ph         %[average],          %[average],      %[left_l1]   \n\t"
189      "addu.ph         %[average],          %[average],      %[left_r1]   \n\t"
190
191      "lw              %[above1],           8(%[above])                   \n\t"
192      "lw              %[above2],           12(%[above])                  \n\t"
193      "lw              %[left1],            8(%[left])                    \n\t"
194      "lw              %[left2],            12(%[left])                   \n\t"
195
196      "preceu.ph.qbl   %[above_l1],         %[above1]                     \n\t"
197      "preceu.ph.qbr   %[above_r1],         %[above1]                     \n\t"
198      "preceu.ph.qbl   %[left_l1],          %[left1]                      \n\t"
199      "preceu.ph.qbr   %[left_r1],          %[left1]                      \n\t"
200
201      "addu.ph         %[average],          %[average],      %[above_l1]  \n\t"
202      "addu.ph         %[average],          %[average],      %[above_r1]  \n\t"
203      "addu.ph         %[average],          %[average],      %[left_l1]   \n\t"
204      "addu.ph         %[average],          %[average],      %[left_r1]   \n\t"
205
206      "preceu.ph.qbl   %[above_l1],         %[above2]                     \n\t"
207      "preceu.ph.qbr   %[above_r1],         %[above2]                     \n\t"
208      "preceu.ph.qbl   %[left_l1],          %[left2]                      \n\t"
209      "preceu.ph.qbr   %[left_r1],          %[left2]                      \n\t"
210
211      "addu.ph         %[average],          %[average],      %[above_l1]  \n\t"
212      "addu.ph         %[average],          %[average],      %[above_r1]  \n\t"
213      "addu.ph         %[average],          %[average],      %[left_l1]   \n\t"
214      "addu.ph         %[average],          %[average],      %[left_r1]   \n\t"
215
216      "addiu           %[average],          %[average],      16           \n\t"
217      "srl             %[tmp],              %[average],      16           \n\t"
218      "addu.ph         %[average],          %[tmp],          %[average]   \n\t"
219      "srl             %[expected_dc],      %[average],      5            \n\t"
220      "replv.qb        %[expected_dc],      %[expected_dc]                \n\t"
221
222      "sw              %[expected_dc],      (%[dst])                      \n\t"
223      "sw              %[expected_dc],      4(%[dst])                     \n\t"
224      "sw              %[expected_dc],      8(%[dst])                     \n\t"
225      "sw              %[expected_dc],      12(%[dst])                    \n\t"
226
227      "add             %[dst],              %[dst],          %[stride]    \n\t"
228      "sw              %[expected_dc],      (%[dst])                      \n\t"
229      "sw              %[expected_dc],      4(%[dst])                     \n\t"
230      "sw              %[expected_dc],      8(%[dst])                     \n\t"
231      "sw              %[expected_dc],      12(%[dst])                    \n\t"
232
233      "add             %[dst],              %[dst],          %[stride]    \n\t"
234      "sw              %[expected_dc],      (%[dst])                      \n\t"
235      "sw              %[expected_dc],      4(%[dst])                     \n\t"
236      "sw              %[expected_dc],      8(%[dst])                     \n\t"
237      "sw              %[expected_dc],      12(%[dst])                    \n\t"
238
239      "add             %[dst],              %[dst],          %[stride]    \n\t"
240      "sw              %[expected_dc],      (%[dst])                      \n\t"
241      "sw              %[expected_dc],      4(%[dst])                     \n\t"
242      "sw              %[expected_dc],      8(%[dst])                     \n\t"
243      "sw              %[expected_dc],      12(%[dst])                    \n\t"
244
245      "add             %[dst],              %[dst],          %[stride]    \n\t"
246      "sw              %[expected_dc],      (%[dst])                      \n\t"
247      "sw              %[expected_dc],      4(%[dst])                     \n\t"
248      "sw              %[expected_dc],      8(%[dst])                     \n\t"
249      "sw              %[expected_dc],      12(%[dst])                    \n\t"
250
251      "add             %[dst],              %[dst],          %[stride]    \n\t"
252      "sw              %[expected_dc],      (%[dst])                      \n\t"
253      "sw              %[expected_dc],      4(%[dst])                     \n\t"
254      "sw              %[expected_dc],      8(%[dst])                     \n\t"
255      "sw              %[expected_dc],      12(%[dst])                    \n\t"
256
257      "add             %[dst],              %[dst],          %[stride]    \n\t"
258      "sw              %[expected_dc],      (%[dst])                      \n\t"
259      "sw              %[expected_dc],      4(%[dst])                     \n\t"
260      "sw              %[expected_dc],      8(%[dst])                     \n\t"
261      "sw              %[expected_dc],      12(%[dst])                    \n\t"
262
263      "add             %[dst],              %[dst],          %[stride]    \n\t"
264      "sw              %[expected_dc],      (%[dst])                      \n\t"
265      "sw              %[expected_dc],      4(%[dst])                     \n\t"
266      "sw              %[expected_dc],      8(%[dst])                     \n\t"
267      "sw              %[expected_dc],      12(%[dst])                    \n\t"
268
269      "add             %[dst],              %[dst],          %[stride]    \n\t"
270      "sw              %[expected_dc],      (%[dst])                      \n\t"
271      "sw              %[expected_dc],      4(%[dst])                     \n\t"
272      "sw              %[expected_dc],      8(%[dst])                     \n\t"
273      "sw              %[expected_dc],      12(%[dst])                    \n\t"
274
275      "add             %[dst],              %[dst],          %[stride]    \n\t"
276      "sw              %[expected_dc],      (%[dst])                      \n\t"
277      "sw              %[expected_dc],      4(%[dst])                     \n\t"
278      "sw              %[expected_dc],      8(%[dst])                     \n\t"
279      "sw              %[expected_dc],      12(%[dst])                    \n\t"
280
281      "add             %[dst],              %[dst],          %[stride]    \n\t"
282      "sw              %[expected_dc],      (%[dst])                      \n\t"
283      "sw              %[expected_dc],      4(%[dst])                     \n\t"
284      "sw              %[expected_dc],      8(%[dst])                     \n\t"
285      "sw              %[expected_dc],      12(%[dst])                    \n\t"
286
287      "add             %[dst],              %[dst],          %[stride]    \n\t"
288      "sw              %[expected_dc],      (%[dst])                      \n\t"
289      "sw              %[expected_dc],      4(%[dst])                     \n\t"
290      "sw              %[expected_dc],      8(%[dst])                     \n\t"
291      "sw              %[expected_dc],      12(%[dst])                    \n\t"
292
293      "add             %[dst],              %[dst],          %[stride]    \n\t"
294      "sw              %[expected_dc],      (%[dst])                      \n\t"
295      "sw              %[expected_dc],      4(%[dst])                     \n\t"
296      "sw              %[expected_dc],      8(%[dst])                     \n\t"
297      "sw              %[expected_dc],      12(%[dst])                    \n\t"
298
299      "add             %[dst],              %[dst],          %[stride]    \n\t"
300      "sw              %[expected_dc],      (%[dst])                      \n\t"
301      "sw              %[expected_dc],      4(%[dst])                     \n\t"
302      "sw              %[expected_dc],      8(%[dst])                     \n\t"
303      "sw              %[expected_dc],      12(%[dst])                    \n\t"
304
305      "add             %[dst],              %[dst],          %[stride]    \n\t"
306      "sw              %[expected_dc],      (%[dst])                      \n\t"
307      "sw              %[expected_dc],      4(%[dst])                     \n\t"
308      "sw              %[expected_dc],      8(%[dst])                     \n\t"
309      "sw              %[expected_dc],      12(%[dst])                    \n\t"
310
311      "add             %[dst],              %[dst],          %[stride]    \n\t"
312      "sw              %[expected_dc],      (%[dst])                      \n\t"
313      "sw              %[expected_dc],      4(%[dst])                     \n\t"
314      "sw              %[expected_dc],      8(%[dst])                     \n\t"
315      "sw              %[expected_dc],      12(%[dst])                    \n\t"
316
317      : [left1] "=&r"(left1), [above1] "=&r"(above1), [left_l1] "=&r"(left_l1),
318        [above_l1] "=&r"(above_l1), [left_r1] "=&r"(left_r1),
319        [above_r1] "=&r"(above_r1), [above2] "=&r"(above2),
320        [left2] "=&r"(left2), [average] "=&r"(average), [tmp] "=&r"(tmp),
321        [expected_dc] "=&r"(expected_dc)
322      : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
323        [stride] "r"(stride));
324}
325#endif  // #if HAVE_DSPR2
326