1/*
2 *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MACROS_DSPR2_H_
12#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MACROS_DSPR2_H_
13
14#include <stdlib.h>
15
16#include "./vpx_dsp_rtcd.h"
17#include "vpx/vpx_integer.h"
18#include "vpx_mem/vpx_mem.h"
19
20#ifdef __cplusplus
21extern "C" {
22#endif
23
24#if HAVE_DSPR2
25#define STORE_F0()                                                       \
26  {                                                                      \
27    __asm__ __volatile__(                                                \
28        "sb     %[q1_f0],    1(%[s4])           \n\t"                    \
29        "sb     %[q0_f0],    0(%[s4])           \n\t"                    \
30        "sb     %[p0_f0],   -1(%[s4])           \n\t"                    \
31        "sb     %[p1_f0],   -2(%[s4])           \n\t"                    \
32                                                                         \
33        :                                                                \
34        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
35          [p1_f0] "r"(p1_f0), [s4] "r"(s4));                             \
36                                                                         \
37    __asm__ __volatile__(                                                \
38        "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
39        "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
40        "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
41        "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
42                                                                         \
43        : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
44          [p1_f0] "+r"(p1_f0)                                            \
45        :);                                                              \
46                                                                         \
47    __asm__ __volatile__(                                                \
48        "sb     %[q1_f0],    1(%[s3])           \n\t"                    \
49        "sb     %[q0_f0],    0(%[s3])           \n\t"                    \
50        "sb     %[p0_f0],   -1(%[s3])           \n\t"                    \
51        "sb     %[p1_f0],   -2(%[s3])           \n\t"                    \
52                                                                         \
53        : [p1_f0] "+r"(p1_f0)                                            \
54        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [s3] "r"(s3),          \
55          [p0_f0] "r"(p0_f0));                                           \
56                                                                         \
57    __asm__ __volatile__(                                                \
58        "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
59        "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
60        "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
61        "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
62                                                                         \
63        : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
64          [p1_f0] "+r"(p1_f0)                                            \
65        :);                                                              \
66                                                                         \
67    __asm__ __volatile__(                                                \
68        "sb     %[q1_f0],    1(%[s2])           \n\t"                    \
69        "sb     %[q0_f0],    0(%[s2])           \n\t"                    \
70        "sb     %[p0_f0],   -1(%[s2])           \n\t"                    \
71        "sb     %[p1_f0],   -2(%[s2])           \n\t"                    \
72                                                                         \
73        :                                                                \
74        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
75          [p1_f0] "r"(p1_f0), [s2] "r"(s2));                             \
76                                                                         \
77    __asm__ __volatile__(                                                \
78        "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
79        "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
80        "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
81        "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
82                                                                         \
83        : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
84          [p1_f0] "+r"(p1_f0)                                            \
85        :);                                                              \
86                                                                         \
87    __asm__ __volatile__(                                                \
88        "sb     %[q1_f0],    1(%[s1])           \n\t"                    \
89        "sb     %[q0_f0],    0(%[s1])           \n\t"                    \
90        "sb     %[p0_f0],   -1(%[s1])           \n\t"                    \
91        "sb     %[p1_f0],   -2(%[s1])           \n\t"                    \
92                                                                         \
93        :                                                                \
94        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
95          [p1_f0] "r"(p1_f0), [s1] "r"(s1));                             \
96  }
97
98#define STORE_F1()                                                             \
99  {                                                                            \
100    __asm__ __volatile__(                                                      \
101        "sb     %[q2_r],     2(%[s4])           \n\t"                          \
102        "sb     %[q1_r],     1(%[s4])           \n\t"                          \
103        "sb     %[q0_r],     0(%[s4])           \n\t"                          \
104        "sb     %[p0_r],    -1(%[s4])           \n\t"                          \
105        "sb     %[p1_r],    -2(%[s4])           \n\t"                          \
106        "sb     %[p2_r],    -3(%[s4])           \n\t"                          \
107                                                                               \
108        :                                                                      \
109        : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
110          [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s4] "r"(s4)); \
111                                                                               \
112    __asm__ __volatile__(                                                      \
113        "srl    %[q2_r],    %[q2_r],    16      \n\t"                          \
114        "srl    %[q1_r],    %[q1_r],    16      \n\t"                          \
115        "srl    %[q0_r],    %[q0_r],    16      \n\t"                          \
116        "srl    %[p0_r],    %[p0_r],    16      \n\t"                          \
117        "srl    %[p1_r],    %[p1_r],    16      \n\t"                          \
118        "srl    %[p2_r],    %[p2_r],    16      \n\t"                          \
119                                                                               \
120        : [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), [q0_r] "+r"(q0_r),             \
121          [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), [p2_r] "+r"(p2_r)              \
122        :);                                                                    \
123                                                                               \
124    __asm__ __volatile__(                                                      \
125        "sb     %[q2_r],     2(%[s3])           \n\t"                          \
126        "sb     %[q1_r],     1(%[s3])           \n\t"                          \
127        "sb     %[q0_r],     0(%[s3])           \n\t"                          \
128        "sb     %[p0_r],    -1(%[s3])           \n\t"                          \
129        "sb     %[p1_r],    -2(%[s3])           \n\t"                          \
130        "sb     %[p2_r],    -3(%[s3])           \n\t"                          \
131                                                                               \
132        :                                                                      \
133        : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
134          [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s3] "r"(s3)); \
135                                                                               \
136    __asm__ __volatile__(                                                      \
137        "sb     %[q2_l],     2(%[s2])           \n\t"                          \
138        "sb     %[q1_l],     1(%[s2])           \n\t"                          \
139        "sb     %[q0_l],     0(%[s2])           \n\t"                          \
140        "sb     %[p0_l],    -1(%[s2])           \n\t"                          \
141        "sb     %[p1_l],    -2(%[s2])           \n\t"                          \
142        "sb     %[p2_l],    -3(%[s2])           \n\t"                          \
143                                                                               \
144        :                                                                      \
145        : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
146          [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s2] "r"(s2)); \
147                                                                               \
148    __asm__ __volatile__(                                                      \
149        "srl    %[q2_l],    %[q2_l],    16      \n\t"                          \
150        "srl    %[q1_l],    %[q1_l],    16      \n\t"                          \
151        "srl    %[q0_l],    %[q0_l],    16      \n\t"                          \
152        "srl    %[p0_l],    %[p0_l],    16      \n\t"                          \
153        "srl    %[p1_l],    %[p1_l],    16      \n\t"                          \
154        "srl    %[p2_l],    %[p2_l],    16      \n\t"                          \
155                                                                               \
156        : [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), [q0_l] "+r"(q0_l),             \
157          [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), [p2_l] "+r"(p2_l)              \
158        :);                                                                    \
159                                                                               \
160    __asm__ __volatile__(                                                      \
161        "sb     %[q2_l],     2(%[s1])           \n\t"                          \
162        "sb     %[q1_l],     1(%[s1])           \n\t"                          \
163        "sb     %[q0_l],     0(%[s1])           \n\t"                          \
164        "sb     %[p0_l],    -1(%[s1])           \n\t"                          \
165        "sb     %[p1_l],    -2(%[s1])           \n\t"                          \
166        "sb     %[p2_l],    -3(%[s1])           \n\t"                          \
167                                                                               \
168        :                                                                      \
169        : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
170          [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s1] "r"(s1)); \
171  }
172
173#define STORE_F2()                                                 \
174  {                                                                \
175    __asm__ __volatile__(                                          \
176        "sb     %[q6_r],     6(%[s4])           \n\t"              \
177        "sb     %[q5_r],     5(%[s4])           \n\t"              \
178        "sb     %[q4_r],     4(%[s4])           \n\t"              \
179        "sb     %[q3_r],     3(%[s4])           \n\t"              \
180        "sb     %[q2_r],     2(%[s4])           \n\t"              \
181        "sb     %[q1_r],     1(%[s4])           \n\t"              \
182        "sb     %[q0_r],     0(%[s4])           \n\t"              \
183        "sb     %[p0_r],    -1(%[s4])           \n\t"              \
184        "sb     %[p1_r],    -2(%[s4])           \n\t"              \
185        "sb     %[p2_r],    -3(%[s4])           \n\t"              \
186        "sb     %[p3_r],    -4(%[s4])           \n\t"              \
187        "sb     %[p4_r],    -5(%[s4])           \n\t"              \
188        "sb     %[p5_r],    -6(%[s4])           \n\t"              \
189        "sb     %[p6_r],    -7(%[s4])           \n\t"              \
190                                                                   \
191        :                                                          \
192        : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
193          [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
194          [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
195          [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
196          [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s4] "r"(s4));       \
197                                                                   \
198    __asm__ __volatile__(                                          \
199        "srl    %[q6_r],    %[q6_r],    16      \n\t"              \
200        "srl    %[q5_r],    %[q5_r],    16      \n\t"              \
201        "srl    %[q4_r],    %[q4_r],    16      \n\t"              \
202        "srl    %[q3_r],    %[q3_r],    16      \n\t"              \
203        "srl    %[q2_r],    %[q2_r],    16      \n\t"              \
204        "srl    %[q1_r],    %[q1_r],    16      \n\t"              \
205        "srl    %[q0_r],    %[q0_r],    16      \n\t"              \
206        "srl    %[p0_r],    %[p0_r],    16      \n\t"              \
207        "srl    %[p1_r],    %[p1_r],    16      \n\t"              \
208        "srl    %[p2_r],    %[p2_r],    16      \n\t"              \
209        "srl    %[p3_r],    %[p3_r],    16      \n\t"              \
210        "srl    %[p4_r],    %[p4_r],    16      \n\t"              \
211        "srl    %[p5_r],    %[p5_r],    16      \n\t"              \
212        "srl    %[p6_r],    %[p6_r],    16      \n\t"              \
213                                                                   \
214        : [q6_r] "+r"(q6_r), [q5_r] "+r"(q5_r), [q4_r] "+r"(q4_r), \
215          [q3_r] "+r"(q3_r), [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), \
216          [q0_r] "+r"(q0_r), [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), \
217          [p2_r] "+r"(p2_r), [p3_r] "+r"(p3_r), [p4_r] "+r"(p4_r), \
218          [p5_r] "+r"(p5_r), [p6_r] "+r"(p6_r)                     \
219        :);                                                        \
220                                                                   \
221    __asm__ __volatile__(                                          \
222        "sb     %[q6_r],     6(%[s3])           \n\t"              \
223        "sb     %[q5_r],     5(%[s3])           \n\t"              \
224        "sb     %[q4_r],     4(%[s3])           \n\t"              \
225        "sb     %[q3_r],     3(%[s3])           \n\t"              \
226        "sb     %[q2_r],     2(%[s3])           \n\t"              \
227        "sb     %[q1_r],     1(%[s3])           \n\t"              \
228        "sb     %[q0_r],     0(%[s3])           \n\t"              \
229        "sb     %[p0_r],    -1(%[s3])           \n\t"              \
230        "sb     %[p1_r],    -2(%[s3])           \n\t"              \
231        "sb     %[p2_r],    -3(%[s3])           \n\t"              \
232        "sb     %[p3_r],    -4(%[s3])           \n\t"              \
233        "sb     %[p4_r],    -5(%[s3])           \n\t"              \
234        "sb     %[p5_r],    -6(%[s3])           \n\t"              \
235        "sb     %[p6_r],    -7(%[s3])           \n\t"              \
236                                                                   \
237        :                                                          \
238        : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
239          [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
240          [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
241          [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
242          [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s3] "r"(s3));       \
243                                                                   \
244    __asm__ __volatile__(                                          \
245        "sb     %[q6_l],     6(%[s2])           \n\t"              \
246        "sb     %[q5_l],     5(%[s2])           \n\t"              \
247        "sb     %[q4_l],     4(%[s2])           \n\t"              \
248        "sb     %[q3_l],     3(%[s2])           \n\t"              \
249        "sb     %[q2_l],     2(%[s2])           \n\t"              \
250        "sb     %[q1_l],     1(%[s2])           \n\t"              \
251        "sb     %[q0_l],     0(%[s2])           \n\t"              \
252        "sb     %[p0_l],    -1(%[s2])           \n\t"              \
253        "sb     %[p1_l],    -2(%[s2])           \n\t"              \
254        "sb     %[p2_l],    -3(%[s2])           \n\t"              \
255        "sb     %[p3_l],    -4(%[s2])           \n\t"              \
256        "sb     %[p4_l],    -5(%[s2])           \n\t"              \
257        "sb     %[p5_l],    -6(%[s2])           \n\t"              \
258        "sb     %[p6_l],    -7(%[s2])           \n\t"              \
259                                                                   \
260        :                                                          \
261        : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
262          [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
263          [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
264          [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
265          [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s2] "r"(s2));       \
266                                                                   \
267    __asm__ __volatile__(                                          \
268        "srl    %[q6_l],    %[q6_l],    16     \n\t"               \
269        "srl    %[q5_l],    %[q5_l],    16     \n\t"               \
270        "srl    %[q4_l],    %[q4_l],    16     \n\t"               \
271        "srl    %[q3_l],    %[q3_l],    16     \n\t"               \
272        "srl    %[q2_l],    %[q2_l],    16     \n\t"               \
273        "srl    %[q1_l],    %[q1_l],    16     \n\t"               \
274        "srl    %[q0_l],    %[q0_l],    16     \n\t"               \
275        "srl    %[p0_l],    %[p0_l],    16     \n\t"               \
276        "srl    %[p1_l],    %[p1_l],    16     \n\t"               \
277        "srl    %[p2_l],    %[p2_l],    16     \n\t"               \
278        "srl    %[p3_l],    %[p3_l],    16     \n\t"               \
279        "srl    %[p4_l],    %[p4_l],    16     \n\t"               \
280        "srl    %[p5_l],    %[p5_l],    16     \n\t"               \
281        "srl    %[p6_l],    %[p6_l],    16     \n\t"               \
282                                                                   \
283        : [q6_l] "+r"(q6_l), [q5_l] "+r"(q5_l), [q4_l] "+r"(q4_l), \
284          [q3_l] "+r"(q3_l), [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), \
285          [q0_l] "+r"(q0_l), [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), \
286          [p2_l] "+r"(p2_l), [p3_l] "+r"(p3_l), [p4_l] "+r"(p4_l), \
287          [p5_l] "+r"(p5_l), [p6_l] "+r"(p6_l)                     \
288        :);                                                        \
289                                                                   \
290    __asm__ __volatile__(                                          \
291        "sb     %[q6_l],     6(%[s1])           \n\t"              \
292        "sb     %[q5_l],     5(%[s1])           \n\t"              \
293        "sb     %[q4_l],     4(%[s1])           \n\t"              \
294        "sb     %[q3_l],     3(%[s1])           \n\t"              \
295        "sb     %[q2_l],     2(%[s1])           \n\t"              \
296        "sb     %[q1_l],     1(%[s1])           \n\t"              \
297        "sb     %[q0_l],     0(%[s1])           \n\t"              \
298        "sb     %[p0_l],    -1(%[s1])           \n\t"              \
299        "sb     %[p1_l],    -2(%[s1])           \n\t"              \
300        "sb     %[p2_l],    -3(%[s1])           \n\t"              \
301        "sb     %[p3_l],    -4(%[s1])           \n\t"              \
302        "sb     %[p4_l],    -5(%[s1])           \n\t"              \
303        "sb     %[p5_l],    -6(%[s1])           \n\t"              \
304        "sb     %[p6_l],    -7(%[s1])           \n\t"              \
305                                                                   \
306        :                                                          \
307        : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
308          [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
309          [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
310          [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
311          [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s1] "r"(s1));       \
312  }
313
314#define PACK_LEFT_0TO3()                                              \
315  {                                                                   \
316    __asm__ __volatile__(                                             \
317        "preceu.ph.qbl   %[p3_l],   %[p3]   \n\t"                     \
318        "preceu.ph.qbl   %[p2_l],   %[p2]   \n\t"                     \
319        "preceu.ph.qbl   %[p1_l],   %[p1]   \n\t"                     \
320        "preceu.ph.qbl   %[p0_l],   %[p0]   \n\t"                     \
321        "preceu.ph.qbl   %[q0_l],   %[q0]   \n\t"                     \
322        "preceu.ph.qbl   %[q1_l],   %[q1]   \n\t"                     \
323        "preceu.ph.qbl   %[q2_l],   %[q2]   \n\t"                     \
324        "preceu.ph.qbl   %[q3_l],   %[q3]   \n\t"                     \
325                                                                      \
326        : [p3_l] "=&r"(p3_l), [p2_l] "=&r"(p2_l), [p1_l] "=&r"(p1_l), \
327          [p0_l] "=&r"(p0_l), [q0_l] "=&r"(q0_l), [q1_l] "=&r"(q1_l), \
328          [q2_l] "=&r"(q2_l), [q3_l] "=&r"(q3_l)                      \
329        : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
330          [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
331  }
332
333#define PACK_LEFT_4TO7()                                              \
334  {                                                                   \
335    __asm__ __volatile__(                                             \
336        "preceu.ph.qbl   %[p7_l],   %[p7]   \n\t"                     \
337        "preceu.ph.qbl   %[p6_l],   %[p6]   \n\t"                     \
338        "preceu.ph.qbl   %[p5_l],   %[p5]   \n\t"                     \
339        "preceu.ph.qbl   %[p4_l],   %[p4]   \n\t"                     \
340        "preceu.ph.qbl   %[q4_l],   %[q4]   \n\t"                     \
341        "preceu.ph.qbl   %[q5_l],   %[q5]   \n\t"                     \
342        "preceu.ph.qbl   %[q6_l],   %[q6]   \n\t"                     \
343        "preceu.ph.qbl   %[q7_l],   %[q7]   \n\t"                     \
344                                                                      \
345        : [p7_l] "=&r"(p7_l), [p6_l] "=&r"(p6_l), [p5_l] "=&r"(p5_l), \
346          [p4_l] "=&r"(p4_l), [q4_l] "=&r"(q4_l), [q5_l] "=&r"(q5_l), \
347          [q6_l] "=&r"(q6_l), [q7_l] "=&r"(q7_l)                      \
348        : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
349          [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
350  }
351
352#define PACK_RIGHT_0TO3()                                             \
353  {                                                                   \
354    __asm__ __volatile__(                                             \
355        "preceu.ph.qbr   %[p3_r],   %[p3]  \n\t"                      \
356        "preceu.ph.qbr   %[p2_r],   %[p2]   \n\t"                     \
357        "preceu.ph.qbr   %[p1_r],   %[p1]   \n\t"                     \
358        "preceu.ph.qbr   %[p0_r],   %[p0]   \n\t"                     \
359        "preceu.ph.qbr   %[q0_r],   %[q0]   \n\t"                     \
360        "preceu.ph.qbr   %[q1_r],   %[q1]   \n\t"                     \
361        "preceu.ph.qbr   %[q2_r],   %[q2]   \n\t"                     \
362        "preceu.ph.qbr   %[q3_r],   %[q3]   \n\t"                     \
363                                                                      \
364        : [p3_r] "=&r"(p3_r), [p2_r] "=&r"(p2_r), [p1_r] "=&r"(p1_r), \
365          [p0_r] "=&r"(p0_r), [q0_r] "=&r"(q0_r), [q1_r] "=&r"(q1_r), \
366          [q2_r] "=&r"(q2_r), [q3_r] "=&r"(q3_r)                      \
367        : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
368          [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
369  }
370
371#define PACK_RIGHT_4TO7()                                             \
372  {                                                                   \
373    __asm__ __volatile__(                                             \
374        "preceu.ph.qbr   %[p7_r],   %[p7]   \n\t"                     \
375        "preceu.ph.qbr   %[p6_r],   %[p6]   \n\t"                     \
376        "preceu.ph.qbr   %[p5_r],   %[p5]   \n\t"                     \
377        "preceu.ph.qbr   %[p4_r],   %[p4]   \n\t"                     \
378        "preceu.ph.qbr   %[q4_r],   %[q4]   \n\t"                     \
379        "preceu.ph.qbr   %[q5_r],   %[q5]   \n\t"                     \
380        "preceu.ph.qbr   %[q6_r],   %[q6]   \n\t"                     \
381        "preceu.ph.qbr   %[q7_r],   %[q7]   \n\t"                     \
382                                                                      \
383        : [p7_r] "=&r"(p7_r), [p6_r] "=&r"(p6_r), [p5_r] "=&r"(p5_r), \
384          [p4_r] "=&r"(p4_r), [q4_r] "=&r"(q4_r), [q5_r] "=&r"(q5_r), \
385          [q6_r] "=&r"(q6_r), [q7_r] "=&r"(q7_r)                      \
386        : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
387          [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
388  }
389
390#define COMBINE_LEFT_RIGHT_0TO2()                                         \
391  {                                                                       \
392    __asm__ __volatile__(                                                 \
393        "precr.qb.ph    %[p2],  %[p2_l],    %[p2_r]    \n\t"              \
394        "precr.qb.ph    %[p1],  %[p1_l],    %[p1_r]    \n\t"              \
395        "precr.qb.ph    %[p0],  %[p0_l],    %[p0_r]    \n\t"              \
396        "precr.qb.ph    %[q0],  %[q0_l],    %[q0_r]    \n\t"              \
397        "precr.qb.ph    %[q1],  %[q1_l],    %[q1_r]    \n\t"              \
398        "precr.qb.ph    %[q2],  %[q2_l],    %[q2_r]    \n\t"              \
399                                                                          \
400        : [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q0] "=&r"(q0), \
401          [q1] "=&r"(q1), [q2] "=&r"(q2)                                  \
402        : [p2_l] "r"(p2_l), [p2_r] "r"(p2_r), [p1_l] "r"(p1_l),           \
403          [p1_r] "r"(p1_r), [p0_l] "r"(p0_l), [p0_r] "r"(p0_r),           \
404          [q0_l] "r"(q0_l), [q0_r] "r"(q0_r), [q1_l] "r"(q1_l),           \
405          [q1_r] "r"(q1_r), [q2_l] "r"(q2_l), [q2_r] "r"(q2_r));          \
406  }
407
408#define COMBINE_LEFT_RIGHT_3TO6()                                         \
409  {                                                                       \
410    __asm__ __volatile__(                                                 \
411        "precr.qb.ph    %[p6],  %[p6_l],    %[p6_r]    \n\t"              \
412        "precr.qb.ph    %[p5],  %[p5_l],    %[p5_r]    \n\t"              \
413        "precr.qb.ph    %[p4],  %[p4_l],    %[p4_r]    \n\t"              \
414        "precr.qb.ph    %[p3],  %[p3_l],    %[p3_r]    \n\t"              \
415        "precr.qb.ph    %[q3],  %[q3_l],    %[q3_r]    \n\t"              \
416        "precr.qb.ph    %[q4],  %[q4_l],    %[q4_r]    \n\t"              \
417        "precr.qb.ph    %[q5],  %[q5_l],    %[q5_r]    \n\t"              \
418        "precr.qb.ph    %[q6],  %[q6_l],    %[q6_r]    \n\t"              \
419                                                                          \
420        : [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4), [p3] "=&r"(p3), \
421          [q3] "=&r"(q3), [q4] "=&r"(q4), [q5] "=&r"(q5), [q6] "=&r"(q6)  \
422        : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),           \
423          [p3_l] "r"(p3_l), [p6_r] "r"(p6_r), [p5_r] "r"(p5_r),           \
424          [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [q3_l] "r"(q3_l),           \
425          [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l),           \
426          [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),           \
427          [q6_r] "r"(q6_r));                                              \
428  }
429
430#endif  // #if HAVE_DSPR2
431#ifdef __cplusplus
432}  // extern "C"
433#endif
434
435#endif  // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MACROS_DSPR2_H_
436