1/*
2 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "vpx_dsp/mips/common_dspr2.h"
12
13#if HAVE_DSPR2
14void vpx_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
15                               const uint8_t *above, const uint8_t *left) {
16  int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
17  (void)above;
18
19  __asm__ __volatile__(
20      "lb         %[tmp1],      (%[left])                   \n\t"
21      "lb         %[tmp2],      1(%[left])                  \n\t"
22      "lb         %[tmp3],      2(%[left])                  \n\t"
23      "lb         %[tmp4],      3(%[left])                  \n\t"
24      "lb         %[tmp5],      4(%[left])                  \n\t"
25      "lb         %[tmp6],      5(%[left])                  \n\t"
26      "lb         %[tmp7],      6(%[left])                  \n\t"
27      "lb         %[tmp8],      7(%[left])                  \n\t"
28
29      "replv.qb   %[tmp1],      %[tmp1]                     \n\t"
30      "replv.qb   %[tmp2],      %[tmp2]                     \n\t"
31      "replv.qb   %[tmp3],      %[tmp3]                     \n\t"
32      "replv.qb   %[tmp4],      %[tmp4]                     \n\t"
33      "replv.qb   %[tmp5],      %[tmp5]                     \n\t"
34      "replv.qb   %[tmp6],      %[tmp6]                     \n\t"
35      "replv.qb   %[tmp7],      %[tmp7]                     \n\t"
36      "replv.qb   %[tmp8],      %[tmp8]                     \n\t"
37
38      "sw         %[tmp1],      (%[dst])                    \n\t"
39      "sw         %[tmp1],      4(%[dst])                   \n\t"
40      "add        %[dst],       %[dst],         %[stride]   \n\t"
41      "sw         %[tmp2],      (%[dst])                    \n\t"
42      "sw         %[tmp2],      4(%[dst])                   \n\t"
43      "add        %[dst],       %[dst],         %[stride]   \n\t"
44      "sw         %[tmp3],      (%[dst])                    \n\t"
45      "sw         %[tmp3],      4(%[dst])                   \n\t"
46      "add        %[dst],       %[dst],         %[stride]   \n\t"
47      "sw         %[tmp4],      (%[dst])                    \n\t"
48      "sw         %[tmp4],      4(%[dst])                   \n\t"
49      "add        %[dst],       %[dst],         %[stride]   \n\t"
50      "sw         %[tmp5],      (%[dst])                    \n\t"
51      "sw         %[tmp5],      4(%[dst])                   \n\t"
52      "add        %[dst],       %[dst],         %[stride]   \n\t"
53      "sw         %[tmp6],      (%[dst])                    \n\t"
54      "sw         %[tmp6],      4(%[dst])                   \n\t"
55      "add        %[dst],       %[dst],         %[stride]   \n\t"
56      "sw         %[tmp7],      (%[dst])                    \n\t"
57      "sw         %[tmp7],      4(%[dst])                   \n\t"
58      "add        %[dst],       %[dst],         %[stride]   \n\t"
59      "sw         %[tmp8],      (%[dst])                    \n\t"
60      "sw         %[tmp8],      4(%[dst])                   \n\t"
61
62      : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
63        [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7),
64        [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8)
65      : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
66}
67
68void vpx_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
69                                const uint8_t *above, const uint8_t *left) {
70  int32_t expected_dc;
71  int32_t average;
72  int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
73  int32_t above2, above_l2, above_r2, left2, left_r2, left_l2;
74
75  __asm__ __volatile__(
76      "lw              %[above1],         (%[above])                      \n\t"
77      "lw              %[above2],         4(%[above])                     \n\t"
78      "lw              %[left1],          (%[left])                       \n\t"
79      "lw              %[left2],          4(%[left])                      \n\t"
80
81      "preceu.ph.qbl   %[above_l1],       %[above1]                       \n\t"
82      "preceu.ph.qbr   %[above_r1],       %[above1]                       \n\t"
83      "preceu.ph.qbl   %[left_l1],        %[left1]                        \n\t"
84      "preceu.ph.qbr   %[left_r1],        %[left1]                        \n\t"
85
86      "preceu.ph.qbl   %[above_l2],       %[above2]                       \n\t"
87      "preceu.ph.qbr   %[above_r2],       %[above2]                       \n\t"
88      "preceu.ph.qbl   %[left_l2],        %[left2]                        \n\t"
89      "preceu.ph.qbr   %[left_r2],        %[left2]                        \n\t"
90
91      "addu.ph         %[average],        %[above_r1],      %[above_l1]   \n\t"
92      "addu.ph         %[average],        %[average],       %[left_l1]    \n\t"
93      "addu.ph         %[average],        %[average],       %[left_r1]    \n\t"
94
95      "addu.ph         %[average],        %[average],       %[above_l2]   \n\t"
96      "addu.ph         %[average],        %[average],       %[above_r2]   \n\t"
97      "addu.ph         %[average],        %[average],       %[left_l2]    \n\t"
98      "addu.ph         %[average],        %[average],       %[left_r2]    \n\t"
99
100      "addiu           %[average],        %[average],       8             \n\t"
101
102      "srl             %[tmp],            %[average],       16            \n\t"
103      "addu.ph         %[average],        %[tmp],           %[average]    \n\t"
104      "srl             %[expected_dc],    %[average],       4             \n\t"
105      "replv.qb        %[expected_dc],    %[expected_dc]                  \n\t"
106
107      "sw              %[expected_dc],    (%[dst])                        \n\t"
108      "sw              %[expected_dc],    4(%[dst])                       \n\t"
109
110      "add             %[dst],             %[dst],          %[stride]     \n\t"
111      "sw              %[expected_dc],    (%[dst])                        \n\t"
112      "sw              %[expected_dc],    4(%[dst])                       \n\t"
113
114      "add             %[dst],             %[dst],          %[stride]     \n\t"
115      "sw              %[expected_dc],    (%[dst])                        \n\t"
116      "sw              %[expected_dc],    4(%[dst])                       \n\t"
117
118      "add             %[dst],             %[dst],          %[stride]     \n\t"
119      "sw              %[expected_dc],    (%[dst])                        \n\t"
120      "sw              %[expected_dc],    4(%[dst])                       \n\t"
121
122      "add             %[dst],             %[dst],          %[stride]     \n\t"
123      "sw              %[expected_dc],    (%[dst])                        \n\t"
124      "sw              %[expected_dc],    4(%[dst])                       \n\t"
125
126      "add             %[dst],             %[dst],          %[stride]     \n\t"
127      "sw              %[expected_dc],    (%[dst])                        \n\t"
128      "sw              %[expected_dc],    4(%[dst])                       \n\t"
129
130      "add             %[dst],             %[dst],          %[stride]     \n\t"
131      "sw              %[expected_dc],    (%[dst])                        \n\t"
132      "sw              %[expected_dc],    4(%[dst])                       \n\t"
133
134      "add             %[dst],             %[dst],          %[stride]     \n\t"
135      "sw              %[expected_dc],    (%[dst])                        \n\t"
136      "sw              %[expected_dc],    4(%[dst])                       \n\t"
137
138      : [above1] "=&r"(above1), [above_l1] "=&r"(above_l1),
139        [above_r1] "=&r"(above_r1), [left1] "=&r"(left1),
140        [left_l1] "=&r"(left_l1), [left_r1] "=&r"(left_r1),
141        [above2] "=&r"(above2), [above_l2] "=&r"(above_l2),
142        [above_r2] "=&r"(above_r2), [left2] "=&r"(left2),
143        [left_l2] "=&r"(left_l2), [left_r2] "=&r"(left_r2),
144        [average] "=&r"(average), [tmp] "=&r"(tmp),
145        [expected_dc] "=&r"(expected_dc)
146      : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
147        [stride] "r"(stride));
148}
149
150void vpx_tm_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
151                                const uint8_t *above, const uint8_t *left) {
152  int32_t abovel, abover;
153  int32_t abovel_1, abover_1;
154  int32_t left0;
155  int32_t res0, res1, res2, res3;
156  int32_t reshw;
157  int32_t top_left;
158  uint8_t *cm = vpx_ff_cropTbl;
159
160  __asm__ __volatile__(
161      "ulw             %[reshw],       (%[above])                         \n\t"
162      "ulw             %[top_left],    4(%[above])                        \n\t"
163
164      "lbu             %[left0],       (%[left])                          \n\t"
165
166      "preceu.ph.qbl   %[abovel],      %[reshw]                           \n\t"
167      "preceu.ph.qbr   %[abover],      %[reshw]                           \n\t"
168      "preceu.ph.qbl   %[abovel_1],    %[top_left]                        \n\t"
169      "preceu.ph.qbr   %[abover_1],    %[top_left]                        \n\t"
170
171      "lbu             %[top_left],    -1(%[above])                       \n\t"
172      "replv.ph        %[left0],       %[left0]                           \n\t"
173
174      "replv.ph        %[top_left],    %[top_left]                        \n\t"
175
176      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
177      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
178
179      "sll             %[res2],        %[reshw],            16            \n\t"
180      "sra             %[res2],        %[res2],             16            \n\t"
181      "sra             %[res3],        %[reshw],            16            \n\t"
182
183      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
184      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
185
186      "sll             %[res0],        %[reshw],            16            \n\t"
187      "sra             %[res0],        %[res0],             16            \n\t"
188      "sra             %[res1],        %[reshw],            16            \n\t"
189
190      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
191      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
192      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
193      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
194
195      "sb              %[res0],        (%[dst])                           \n\t"
196      "sb              %[res1],        1(%[dst])                          \n\t"
197      "sb              %[res2],        2(%[dst])                          \n\t"
198      "sb              %[res3],        3(%[dst])                          \n\t"
199
200      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
201      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
202
203      "sll             %[res2],        %[reshw],            16            \n\t"
204      "sra             %[res2],        %[res2],             16            \n\t"
205      "sra             %[res3],        %[reshw],            16            \n\t"
206
207      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
208      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
209
210      "sll             %[res0],        %[reshw],            16            \n\t"
211      "sra             %[res0],        %[res0],             16            \n\t"
212      "sra             %[res1],        %[reshw],            16            \n\t"
213
214      "lbu             %[left0],       1(%[left])                         \n\t"
215
216      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
217      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
218      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
219      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
220
221      "sb              %[res0],        4(%[dst])                          \n\t"
222      "sb              %[res1],        5(%[dst])                          \n\t"
223      "sb              %[res2],        6(%[dst])                          \n\t"
224      "sb              %[res3],        7(%[dst])                          \n\t"
225
226      "replv.ph        %[left0],       %[left0]                           \n\t"
227      "add             %[dst],          %[dst],             %[stride]     \n\t"
228
229      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
230      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
231
232      "sll             %[res2],        %[reshw],            16            \n\t"
233      "sra             %[res2],        %[res2],             16            \n\t"
234      "sra             %[res3],        %[reshw],            16            \n\t"
235
236      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
237      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
238
239      "sll             %[res0],        %[reshw],            16            \n\t"
240      "sra             %[res0],        %[res0],             16            \n\t"
241      "sra             %[res1],        %[reshw],            16            \n\t"
242
243      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
244      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
245      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
246      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
247
248      "sb              %[res0],        (%[dst])                           \n\t"
249      "sb              %[res1],        1(%[dst])                          \n\t"
250      "sb              %[res2],        2(%[dst])                          \n\t"
251      "sb              %[res3],        3(%[dst])                          \n\t"
252
253      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
254      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
255
256      "sll             %[res2],        %[reshw],            16            \n\t"
257      "sra             %[res2],        %[res2],             16            \n\t"
258      "sra             %[res3],        %[reshw],            16            \n\t"
259
260      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
261      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
262
263      "sll             %[res0],        %[reshw],            16            \n\t"
264      "sra             %[res0],        %[res0],             16            \n\t"
265      "sra             %[res1],        %[reshw],            16            \n\t"
266
267      "lbu             %[left0],       2(%[left])                         \n\t"
268
269      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
270      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
271      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
272      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
273
274      "sb              %[res0],        4(%[dst])                          \n\t"
275      "sb              %[res1],        5(%[dst])                          \n\t"
276      "sb              %[res2],        6(%[dst])                          \n\t"
277      "sb              %[res3],        7(%[dst])                          \n\t"
278
279      "replv.ph        %[left0],       %[left0]                           \n\t"
280      "add             %[dst],          %[dst],             %[stride]     \n\t"
281
282      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
283      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
284
285      "sll             %[res2],        %[reshw],            16            \n\t"
286      "sra             %[res2],        %[res2],             16            \n\t"
287      "sra             %[res3],        %[reshw],            16            \n\t"
288
289      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
290      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
291
292      "sll             %[res0],        %[reshw],            16            \n\t"
293      "sra             %[res0],        %[res0],             16            \n\t"
294      "sra             %[res1],        %[reshw],            16            \n\t"
295
296      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
297      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
298      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
299      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
300
301      "sb              %[res0],        (%[dst])                           \n\t"
302      "sb              %[res1],        1(%[dst])                          \n\t"
303      "sb              %[res2],        2(%[dst])                          \n\t"
304      "sb              %[res3],        3(%[dst])                          \n\t"
305
306      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
307      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
308
309      "sll             %[res2],        %[reshw],            16            \n\t"
310      "sra             %[res2],        %[res2],             16            \n\t"
311      "sra             %[res3],        %[reshw],            16            \n\t"
312
313      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
314      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
315
316      "sll             %[res0],        %[reshw],            16            \n\t"
317      "sra             %[res0],        %[res0],             16            \n\t"
318      "sra             %[res1],        %[reshw],            16            \n\t"
319
320      "lbu             %[left0],       3(%[left])                         \n\t"
321
322      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
323      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
324      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
325      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
326
327      "sb              %[res0],        4(%[dst])                          \n\t"
328      "sb              %[res1],        5(%[dst])                          \n\t"
329      "sb              %[res2],        6(%[dst])                          \n\t"
330      "sb              %[res3],        7(%[dst])                          \n\t"
331
332      "replv.ph        %[left0],       %[left0]                           \n\t"
333      "add             %[dst],          %[dst],             %[stride]     \n\t"
334
335      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
336      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
337
338      "sll             %[res2],        %[reshw],            16            \n\t"
339      "sra             %[res2],        %[res2],             16            \n\t"
340      "sra             %[res3],        %[reshw],            16            \n\t"
341
342      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
343      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
344
345      "sll             %[res0],        %[reshw],            16            \n\t"
346      "sra             %[res0],        %[res0],             16            \n\t"
347      "sra             %[res1],        %[reshw],            16            \n\t"
348
349      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
350      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
351      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
352      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
353
354      "sb              %[res0],        (%[dst])                           \n\t"
355      "sb              %[res1],        1(%[dst])                          \n\t"
356      "sb              %[res2],        2(%[dst])                          \n\t"
357      "sb              %[res3],        3(%[dst])                          \n\t"
358
359      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
360      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
361
362      "sll             %[res2],        %[reshw],            16            \n\t"
363      "sra             %[res2],        %[res2],             16            \n\t"
364      "sra             %[res3],        %[reshw],            16            \n\t"
365
366      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
367      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
368
369      "sll             %[res0],        %[reshw],            16            \n\t"
370      "sra             %[res0],        %[res0],             16            \n\t"
371      "sra             %[res1],        %[reshw],            16            \n\t"
372
373      "lbu             %[left0],       4(%[left])                         \n\t"
374
375      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
376      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
377      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
378      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
379
380      "sb              %[res0],        4(%[dst])                          \n\t"
381      "sb              %[res1],        5(%[dst])                          \n\t"
382      "sb              %[res2],        6(%[dst])                          \n\t"
383      "sb              %[res3],        7(%[dst])                          \n\t"
384
385      "replv.ph        %[left0],       %[left0]                           \n\t"
386      "add             %[dst],          %[dst],             %[stride]     \n\t"
387
388      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
389      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
390
391      "sll             %[res2],        %[reshw],            16            \n\t"
392      "sra             %[res2],        %[res2],             16            \n\t"
393      "sra             %[res3],        %[reshw],            16            \n\t"
394
395      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
396      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
397
398      "sll             %[res0],        %[reshw],            16            \n\t"
399      "sra             %[res0],        %[res0],             16            \n\t"
400      "sra             %[res1],        %[reshw],            16            \n\t"
401
402      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
403      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
404      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
405      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
406
407      "sb              %[res0],        (%[dst])                           \n\t"
408      "sb              %[res1],        1(%[dst])                          \n\t"
409      "sb              %[res2],        2(%[dst])                          \n\t"
410      "sb              %[res3],        3(%[dst])                          \n\t"
411
412      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
413      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
414
415      "sll             %[res2],        %[reshw],            16            \n\t"
416      "sra             %[res2],        %[res2],             16            \n\t"
417      "sra             %[res3],        %[reshw],            16            \n\t"
418
419      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
420      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
421
422      "sll             %[res0],        %[reshw],            16            \n\t"
423      "sra             %[res0],        %[res0],             16            \n\t"
424      "sra             %[res1],        %[reshw],            16            \n\t"
425
426      "lbu             %[left0],       5(%[left])                         \n\t"
427
428      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
429      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
430      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
431      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
432
433      "sb              %[res0],        4(%[dst])                          \n\t"
434      "sb              %[res1],        5(%[dst])                          \n\t"
435      "sb              %[res2],        6(%[dst])                          \n\t"
436      "sb              %[res3],        7(%[dst])                          \n\t"
437
438      "replv.ph        %[left0],       %[left0]                           \n\t"
439      "add             %[dst],          %[dst],             %[stride]     \n\t"
440
441      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
442      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
443
444      "sll             %[res2],        %[reshw],            16            \n\t"
445      "sra             %[res2],        %[res2],             16            \n\t"
446      "sra             %[res3],        %[reshw],            16            \n\t"
447
448      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
449      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
450
451      "sll             %[res0],        %[reshw],            16            \n\t"
452      "sra             %[res0],        %[res0],             16            \n\t"
453      "sra             %[res1],        %[reshw],            16            \n\t"
454
455      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
456      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
457      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
458      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
459
460      "sb              %[res0],        (%[dst])                           \n\t"
461      "sb              %[res1],        1(%[dst])                          \n\t"
462      "sb              %[res2],        2(%[dst])                          \n\t"
463      "sb              %[res3],        3(%[dst])                          \n\t"
464
465      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
466      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
467
468      "sll             %[res2],        %[reshw],            16            \n\t"
469      "sra             %[res2],        %[res2],             16            \n\t"
470      "sra             %[res3],        %[reshw],            16            \n\t"
471
472      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
473      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
474
475      "sll             %[res0],        %[reshw],            16            \n\t"
476      "sra             %[res0],        %[res0],             16            \n\t"
477      "sra             %[res1],        %[reshw],            16            \n\t"
478
479      "lbu             %[left0],       6(%[left])                         \n\t"
480
481      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
482      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
483      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
484      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
485
486      "sb              %[res0],        4(%[dst])                          \n\t"
487      "sb              %[res1],        5(%[dst])                          \n\t"
488      "sb              %[res2],        6(%[dst])                          \n\t"
489      "sb              %[res3],        7(%[dst])                          \n\t"
490
491      "replv.ph        %[left0],       %[left0]                           \n\t"
492      "add             %[dst],          %[dst],             %[stride]     \n\t"
493
494      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
495      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
496
497      "sll             %[res2],        %[reshw],            16            \n\t"
498      "sra             %[res2],        %[res2],             16            \n\t"
499      "sra             %[res3],        %[reshw],            16            \n\t"
500
501      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
502      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
503
504      "sll             %[res0],        %[reshw],            16            \n\t"
505      "sra             %[res0],        %[res0],             16            \n\t"
506      "sra             %[res1],        %[reshw],            16            \n\t"
507
508      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
509      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
510      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
511      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
512
513      "sb              %[res0],        (%[dst])                           \n\t"
514      "sb              %[res1],        1(%[dst])                          \n\t"
515      "sb              %[res2],        2(%[dst])                          \n\t"
516      "sb              %[res3],        3(%[dst])                          \n\t"
517
518      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
519      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
520
521      "sll             %[res2],        %[reshw],            16            \n\t"
522      "sra             %[res2],        %[res2],             16            \n\t"
523      "sra             %[res3],        %[reshw],            16            \n\t"
524
525      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
526      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
527
528      "sll             %[res0],        %[reshw],            16            \n\t"
529      "sra             %[res0],        %[res0],             16            \n\t"
530      "sra             %[res1],        %[reshw],            16            \n\t"
531
532      "lbu             %[left0],       7(%[left])                         \n\t"
533
534      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
535      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
536      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
537      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
538
539      "sb              %[res0],        4(%[dst])                          \n\t"
540      "sb              %[res1],        5(%[dst])                          \n\t"
541      "sb              %[res2],        6(%[dst])                          \n\t"
542      "sb              %[res3],        7(%[dst])                          \n\t"
543
544      "replv.ph        %[left0],       %[left0]                           \n\t"
545      "add             %[dst],          %[dst],             %[stride]     \n\t"
546
547      "addu.ph         %[reshw],       %[abovel],           %[left0]      \n\t"
548      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
549
550      "sll             %[res2],        %[reshw],            16            \n\t"
551      "sra             %[res2],        %[res2],             16            \n\t"
552      "sra             %[res3],        %[reshw],            16            \n\t"
553
554      "addu.ph         %[reshw],       %[abover],           %[left0]      \n\t"
555      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
556
557      "sll             %[res0],        %[reshw],            16            \n\t"
558      "sra             %[res0],        %[res0],             16            \n\t"
559      "sra             %[res1],        %[reshw],            16            \n\t"
560
561      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
562      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
563      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
564      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
565
566      "sb              %[res0],        (%[dst])                           \n\t"
567      "sb              %[res1],        1(%[dst])                          \n\t"
568      "sb              %[res2],        2(%[dst])                          \n\t"
569      "sb              %[res3],        3(%[dst])                          \n\t"
570
571      "addu.ph         %[reshw],       %[abovel_1],         %[left0]      \n\t"
572      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
573
574      "sll             %[res2],        %[reshw],            16            \n\t"
575      "sra             %[res2],        %[res2],             16            \n\t"
576      "sra             %[res3],        %[reshw],            16            \n\t"
577
578      "addu.ph         %[reshw],       %[abover_1],         %[left0]      \n\t"
579      "subu.ph         %[reshw],       %[reshw],            %[top_left]   \n\t"
580
581      "sll             %[res0],        %[reshw],            16            \n\t"
582      "sra             %[res0],        %[res0],             16            \n\t"
583      "sra             %[res1],        %[reshw],            16            \n\t"
584
585      "lbux            %[res0],        %[res0](%[cm])                     \n\t"
586      "lbux            %[res1],        %[res1](%[cm])                     \n\t"
587      "lbux            %[res2],        %[res2](%[cm])                     \n\t"
588      "lbux            %[res3],        %[res3](%[cm])                     \n\t"
589
590      "sb              %[res0],        4(%[dst])                          \n\t"
591      "sb              %[res1],        5(%[dst])                          \n\t"
592      "sb              %[res2],        6(%[dst])                          \n\t"
593      "sb              %[res3],        7(%[dst])                          \n\t"
594
595      : [abovel] "=&r"(abovel), [abover] "=&r"(abover),
596        [abovel_1] "=&r"(abovel_1), [abover_1] "=&r"(abover_1),
597        [left0] "=&r"(left0), [res2] "=&r"(res2), [res3] "=&r"(res3),
598        [res0] "=&r"(res0), [res1] "=&r"(res1), [reshw] "=&r"(reshw),
599        [top_left] "=&r"(top_left)
600      : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
601        [stride] "r"(stride), [cm] "r"(cm));
602}
603#endif  // #if HAVE_DSPR2
604