loopfilter_filters_dspr2.h revision 7ce0a1d1337c01056ba24006efab21f00e179e04
1/* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ 12#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ 13 14#include <stdlib.h> 15 16#include "./vpx_dsp_rtcd.h" 17#include "vpx/vpx_integer.h" 18#include "vpx_mem/vpx_mem.h" 19#include "vpx_ports/mem.h" 20 21#ifdef __cplusplus 22extern "C" { 23#endif 24 25#if HAVE_DSPR2 26/* inputs & outputs are quad-byte vectors */ 27static INLINE void filter_dspr2(uint32_t mask, uint32_t hev, 28 uint32_t *ps1, uint32_t *ps0, 29 uint32_t *qs0, uint32_t *qs1) { 30 int32_t vpx_filter_l, vpx_filter_r; 31 int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; 32 int32_t subr_r, subr_l; 33 uint32_t t1, t2, HWM, t3; 34 uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; 35 int32_t vps1, vps0, vqs0, vqs1; 36 int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; 37 uint32_t N128; 38 39 N128 = 0x80808080; 40 t1 = 0x03000300; 41 t2 = 0x04000400; 42 t3 = 0x01000100; 43 HWM = 0xFF00FF00; 44 45 vps0 = (*ps0) ^ N128; 46 vps1 = (*ps1) ^ N128; 47 vqs0 = (*qs0) ^ N128; 48 vqs1 = (*qs1) ^ N128; 49 50 /* use halfword pairs instead quad-bytes because of accuracy */ 51 vps0_l = vps0 & HWM; 52 vps0_r = vps0 << 8; 53 vps0_r = vps0_r & HWM; 54 55 vps1_l = vps1 & HWM; 56 vps1_r = vps1 << 8; 57 vps1_r = vps1_r & HWM; 58 59 vqs0_l = vqs0 & HWM; 60 vqs0_r = vqs0 << 8; 61 vqs0_r = vqs0_r & HWM; 62 63 vqs1_l = vqs1 & HWM; 64 vqs1_r = vqs1 << 8; 65 vqs1_r = vqs1_r & HWM; 66 67 mask_l = mask & HWM; 68 mask_r = mask << 8; 69 mask_r = mask_r & HWM; 70 71 hev_l = hev & HWM; 72 hev_r = hev << 8; 73 hev_r = hev_r & HWM; 74 75 __asm__ __volatile__ ( 76 /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */ 77 "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t" 78 "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t" 79 80 /* qs0 - ps0 */ 81 "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" 82 "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" 83 84 /* vpx_filter &= hev; */ 85 "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t" 86 "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t" 87 88 /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */ 89 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 90 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 91 "xor %[invhev_l], %[hev_l], %[HWM] \n\t" 92 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 93 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 94 "xor %[invhev_r], %[hev_r], %[HWM] \n\t" 95 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 96 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 97 98 /* vpx_filter &= mask; */ 99 "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t" 100 "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t" 101 102 : [vpx_filter_l] "=&r" (vpx_filter_l), 103 [vpx_filter_r] "=&r" (vpx_filter_r), 104 [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r), 105 [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r) 106 : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), 107 [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), 108 [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r), 109 [mask_l] "r" (mask_l), [mask_r] "r" (mask_r), 110 [hev_l] "r" (hev_l), [hev_r] "r" (hev_r), 111 [HWM] "r" (HWM) 112 ); 113 114 /* save bottom 3 bits so that we round one side +4 and the other +3 */ 115 __asm__ __volatile__ ( 116 /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */ 117 "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t" 118 "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t" 119 120 /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */ 121 "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t" 122 "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t" 123 "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" 124 "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" 125 126 "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" 127 "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" 128 129 "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" 130 "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" 131 132 /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ 133 "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" 134 "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" 135 136 /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ 137 "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" 138 "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" 139 140 : [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), 141 [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r), 142 [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), 143 [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) 144 : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM), 145 [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r) 146 ); 147 148 __asm__ __volatile__ ( 149 /* (vpx_filter += 1) >>= 1 */ 150 "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" 151 "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" 152 153 /* vpx_filter &= ~hev; */ 154 "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" 155 "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" 156 157 /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */ 158 "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" 159 "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" 160 161 /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */ 162 "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" 163 "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" 164 165 : [Filter1_l] "+r" (Filter1_l), [Filter1_r] "+r" (Filter1_r), 166 [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), 167 [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) 168 : [t3] "r" (t3), [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) 169 ); 170 171 /* Create quad-bytes from halfword pairs */ 172 vqs0_l = vqs0_l & HWM; 173 vqs1_l = vqs1_l & HWM; 174 vps0_l = vps0_l & HWM; 175 vps1_l = vps1_l & HWM; 176 177 __asm__ __volatile__ ( 178 "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" 179 "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" 180 "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" 181 "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" 182 183 : [vps1_r] "+r" (vps1_r), [vqs1_r] "+r" (vqs1_r), 184 [vps0_r] "+r" (vps0_r), [vqs0_r] "+r" (vqs0_r) 185 : 186 ); 187 188 vqs0 = vqs0_l | vqs0_r; 189 vqs1 = vqs1_l | vqs1_r; 190 vps0 = vps0_l | vps0_r; 191 vps1 = vps1_l | vps1_r; 192 193 *ps0 = vps0 ^ N128; 194 *ps1 = vps1 ^ N128; 195 *qs0 = vqs0 ^ N128; 196 *qs1 = vqs1 ^ N128; 197} 198 199static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev, 200 uint32_t ps1, uint32_t ps0, 201 uint32_t qs0, uint32_t qs1, 202 uint32_t *p1_f0, uint32_t *p0_f0, 203 uint32_t *q0_f0, uint32_t *q1_f0) { 204 int32_t vpx_filter_l, vpx_filter_r; 205 int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; 206 int32_t subr_r, subr_l; 207 uint32_t t1, t2, HWM, t3; 208 uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; 209 int32_t vps1, vps0, vqs0, vqs1; 210 int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; 211 uint32_t N128; 212 213 N128 = 0x80808080; 214 t1 = 0x03000300; 215 t2 = 0x04000400; 216 t3 = 0x01000100; 217 HWM = 0xFF00FF00; 218 219 vps0 = (ps0) ^ N128; 220 vps1 = (ps1) ^ N128; 221 vqs0 = (qs0) ^ N128; 222 vqs1 = (qs1) ^ N128; 223 224 /* use halfword pairs instead quad-bytes because of accuracy */ 225 vps0_l = vps0 & HWM; 226 vps0_r = vps0 << 8; 227 vps0_r = vps0_r & HWM; 228 229 vps1_l = vps1 & HWM; 230 vps1_r = vps1 << 8; 231 vps1_r = vps1_r & HWM; 232 233 vqs0_l = vqs0 & HWM; 234 vqs0_r = vqs0 << 8; 235 vqs0_r = vqs0_r & HWM; 236 237 vqs1_l = vqs1 & HWM; 238 vqs1_r = vqs1 << 8; 239 vqs1_r = vqs1_r & HWM; 240 241 mask_l = mask & HWM; 242 mask_r = mask << 8; 243 mask_r = mask_r & HWM; 244 245 hev_l = hev & HWM; 246 hev_r = hev << 8; 247 hev_r = hev_r & HWM; 248 249 __asm__ __volatile__ ( 250 /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */ 251 "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t" 252 "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t" 253 254 /* qs0 - ps0 */ 255 "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" 256 "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" 257 258 /* vpx_filter &= hev; */ 259 "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t" 260 "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t" 261 262 /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */ 263 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 264 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 265 "xor %[invhev_l], %[hev_l], %[HWM] \n\t" 266 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 267 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 268 "xor %[invhev_r], %[hev_r], %[HWM] \n\t" 269 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 270 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 271 272 /* vpx_filter &= mask; */ 273 "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t" 274 "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t" 275 276 : [vpx_filter_l] "=&r" (vpx_filter_l), 277 [vpx_filter_r] "=&r" (vpx_filter_r), 278 [subr_l] "=&r" (subr_l), [subr_r] "=&r" (subr_r), 279 [invhev_l] "=&r" (invhev_l), [invhev_r] "=&r" (invhev_r) 280 : [vps0_l] "r" (vps0_l), [vps0_r] "r" (vps0_r), [vps1_l] "r" (vps1_l), 281 [vps1_r] "r" (vps1_r), [vqs0_l] "r" (vqs0_l), [vqs0_r] "r" (vqs0_r), 282 [vqs1_l] "r" (vqs1_l), [vqs1_r] "r" (vqs1_r), 283 [mask_l] "r" (mask_l), [mask_r] "r" (mask_r), 284 [hev_l] "r" (hev_l), [hev_r] "r" (hev_r), [HWM] "r" (HWM) 285 ); 286 287 /* save bottom 3 bits so that we round one side +4 and the other +3 */ 288 __asm__ __volatile__ ( 289 /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */ 290 "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t" 291 "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t" 292 293 /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */ 294 "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t" 295 "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t" 296 "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" 297 "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" 298 299 "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" 300 "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" 301 302 "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" 303 "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" 304 305 /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ 306 "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" 307 "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" 308 309 /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ 310 "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" 311 "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" 312 313 : [Filter1_l] "=&r" (Filter1_l), [Filter1_r] "=&r" (Filter1_r), 314 [Filter2_l] "=&r" (Filter2_l), [Filter2_r] "=&r" (Filter2_r), 315 [vps0_l] "+r" (vps0_l), [vps0_r] "+r" (vps0_r), 316 [vqs0_l] "+r" (vqs0_l), [vqs0_r] "+r" (vqs0_r) 317 : [t1] "r" (t1), [t2] "r" (t2), [HWM] "r" (HWM), 318 [vpx_filter_l] "r" (vpx_filter_l), [vpx_filter_r] "r" (vpx_filter_r) 319 ); 320 321 __asm__ __volatile__ ( 322 /* (vpx_filter += 1) >>= 1 */ 323 "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" 324 "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" 325 326 /* vpx_filter &= ~hev; */ 327 "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" 328 "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" 329 330 /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */ 331 "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" 332 "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" 333 334 /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */ 335 "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" 336 "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" 337 338 : [Filter1_l] "+r" (Filter1_l), [Filter1_r] "+r" (Filter1_r), 339 [vps1_l] "+r" (vps1_l), [vps1_r] "+r" (vps1_r), 340 [vqs1_l] "+r" (vqs1_l), [vqs1_r] "+r" (vqs1_r) 341 : [t3] "r" (t3), [invhev_l] "r" (invhev_l), [invhev_r] "r" (invhev_r) 342 ); 343 344 /* Create quad-bytes from halfword pairs */ 345 vqs0_l = vqs0_l & HWM; 346 vqs1_l = vqs1_l & HWM; 347 vps0_l = vps0_l & HWM; 348 vps1_l = vps1_l & HWM; 349 350 __asm__ __volatile__ ( 351 "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" 352 "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" 353 "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" 354 "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" 355 356 : [vps1_r] "+r" (vps1_r), [vqs1_r] "+r" (vqs1_r), 357 [vps0_r] "+r" (vps0_r), [vqs0_r] "+r" (vqs0_r) 358 : 359 ); 360 361 vqs0 = vqs0_l | vqs0_r; 362 vqs1 = vqs1_l | vqs1_r; 363 vps0 = vps0_l | vps0_r; 364 vps1 = vps1_l | vps1_r; 365 366 *p0_f0 = vps0 ^ N128; 367 *p1_f0 = vps1 ^ N128; 368 *q0_f0 = vqs0 ^ N128; 369 *q1_f0 = vqs1 ^ N128; 370} 371 372static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2, 373 uint32_t *op1, uint32_t *op0, 374 uint32_t *oq0, uint32_t *oq1, 375 uint32_t *oq2, uint32_t *oq3) { 376 /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ 377 const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; 378 const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; 379 uint32_t res_op2, res_op1, res_op0; 380 uint32_t res_oq0, res_oq1, res_oq2; 381 uint32_t tmp; 382 uint32_t add_p210_q012; 383 uint32_t u32Four = 0x00040004; 384 385 /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */ 386 /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */ 387 /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */ 388 /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */ 389 /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */ 390 /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */ 391 392 __asm__ __volatile__ ( 393 "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t" 394 "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t" 395 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t" 396 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t" 397 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t" 398 "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t" 399 400 "shll.ph %[tmp], %[p3], 1 \n\t" 401 "addu.ph %[res_op2], %[tmp], %[p3] \n\t" 402 "addu.ph %[res_op1], %[p3], %[p3] \n\t" 403 "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" 404 "addu.ph %[res_op1], %[res_op1], %[p1] \n\t" 405 "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t" 406 "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t" 407 "subu.ph %[res_op2], %[res_op2], %[q1] \n\t" 408 "subu.ph %[res_op1], %[res_op1], %[q2] \n\t" 409 "subu.ph %[res_op2], %[res_op2], %[q2] \n\t" 410 "shrl.ph %[res_op1], %[res_op1], 3 \n\t" 411 "shrl.ph %[res_op2], %[res_op2], 3 \n\t" 412 "addu.ph %[res_op0], %[p3], %[p0] \n\t" 413 "addu.ph %[res_oq0], %[q0], %[q3] \n\t" 414 "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t" 415 "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t" 416 "addu.ph %[res_oq1], %[q3], %[q3] \n\t" 417 "shll.ph %[tmp], %[q3], 1 \n\t" 418 "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t" 419 "addu.ph %[res_oq2], %[tmp], %[q3] \n\t" 420 "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t" 421 "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t" 422 "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t" 423 "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" 424 "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t" 425 "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t" 426 "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t" 427 "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t" 428 "shrl.ph %[res_op0], %[res_op0], 3 \n\t" 429 "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t" 430 431 : [add_p210_q012] "=&r" (add_p210_q012), 432 [tmp] "=&r" (tmp), [res_op2] "=&r" (res_op2), 433 [res_op1] "=&r" (res_op1), [res_op0] "=&r" (res_op0), 434 [res_oq0] "=&r" (res_oq0), [res_oq1] "=&r" (res_oq1), 435 [res_oq2] "=&r" (res_oq2) 436 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [q1] "r" (q1), 437 [p2] "r" (p2), [q2] "r" (q2), [p3] "r" (p3), [q3] "r" (q3), 438 [u32Four] "r" (u32Four) 439 ); 440 441 *op2 = res_op2; 442 *op1 = res_op1; 443 *op0 = res_op0; 444 *oq0 = res_oq0; 445 *oq1 = res_oq1; 446 *oq2 = res_oq2; 447} 448 449static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2, 450 uint32_t p1, uint32_t p0, 451 uint32_t q0, uint32_t q1, 452 uint32_t q2, uint32_t q3, 453 uint32_t *op2_f1, 454 uint32_t *op1_f1, uint32_t *op0_f1, 455 uint32_t *oq0_f1, uint32_t *oq1_f1, 456 uint32_t *oq2_f1) { 457 /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ 458 uint32_t res_op2, res_op1, res_op0; 459 uint32_t res_oq0, res_oq1, res_oq2; 460 uint32_t tmp; 461 uint32_t add_p210_q012; 462 uint32_t u32Four = 0x00040004; 463 464 /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */ 465 /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */ 466 /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */ 467 /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */ 468 /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */ 469 /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */ 470 471 __asm__ __volatile__ ( 472 "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t" 473 "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t" 474 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t" 475 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t" 476 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t" 477 "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t" 478 479 "shll.ph %[tmp], %[p3], 1 \n\t" 480 "addu.ph %[res_op2], %[tmp], %[p3] \n\t" 481 "addu.ph %[res_op1], %[p3], %[p3] \n\t" 482 "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" 483 "addu.ph %[res_op1], %[res_op1], %[p1] \n\t" 484 "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t" 485 "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t" 486 "subu.ph %[res_op2], %[res_op2], %[q1] \n\t" 487 "subu.ph %[res_op1], %[res_op1], %[q2] \n\t" 488 "subu.ph %[res_op2], %[res_op2], %[q2] \n\t" 489 "shrl.ph %[res_op1], %[res_op1], 3 \n\t" 490 "shrl.ph %[res_op2], %[res_op2], 3 \n\t" 491 "addu.ph %[res_op0], %[p3], %[p0] \n\t" 492 "addu.ph %[res_oq0], %[q0], %[q3] \n\t" 493 "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t" 494 "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t" 495 "addu.ph %[res_oq1], %[q3], %[q3] \n\t" 496 "shll.ph %[tmp], %[q3], 1 \n\t" 497 "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t" 498 "addu.ph %[res_oq2], %[tmp], %[q3] \n\t" 499 "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t" 500 "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t" 501 "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t" 502 "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" 503 "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t" 504 "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t" 505 "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t" 506 "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t" 507 "shrl.ph %[res_op0], %[res_op0], 3 \n\t" 508 "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t" 509 510 : [add_p210_q012] "=&r" (add_p210_q012), [tmp] "=&r" (tmp), 511 [res_op2] "=&r" (res_op2), [res_op1] "=&r" (res_op1), 512 [res_op0] "=&r" (res_op0), [res_oq0] "=&r" (res_oq0), 513 [res_oq1] "=&r" (res_oq1), [res_oq2] "=&r" (res_oq2) 514 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [q1] "r" (q1), 515 [p2] "r" (p2), [q2] "r" (q2), [p3] "r" (p3), [q3] "r" (q3), 516 [u32Four] "r" (u32Four) 517 ); 518 519 *op2_f1 = res_op2; 520 *op1_f1 = res_op1; 521 *op0_f1 = res_op0; 522 *oq0_f1 = res_oq0; 523 *oq1_f1 = res_oq1; 524 *oq2_f1 = res_oq2; 525} 526 527static INLINE void wide_mbfilter_dspr2(uint32_t *op7, uint32_t *op6, 528 uint32_t *op5, uint32_t *op4, 529 uint32_t *op3, uint32_t *op2, 530 uint32_t *op1, uint32_t *op0, 531 uint32_t *oq0, uint32_t *oq1, 532 uint32_t *oq2, uint32_t *oq3, 533 uint32_t *oq4, uint32_t *oq5, 534 uint32_t *oq6, uint32_t *oq7) { 535 const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4; 536 const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; 537 const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; 538 const uint32_t q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; 539 uint32_t res_op6, res_op5, res_op4, res_op3, res_op2, res_op1, res_op0; 540 uint32_t res_oq0, res_oq1, res_oq2, res_oq3, res_oq4, res_oq5, res_oq6; 541 uint32_t tmp; 542 uint32_t add_p6toq6; 543 uint32_t u32Eight = 0x00080008; 544 545 __asm__ __volatile__ ( 546 /* addition of p6,p5,p4,p3,p2,p1,p0,q0,q1,q2,q3,q4,q5,q6 547 which is used most of the time */ 548 "addu.ph %[add_p6toq6], %[p6], %[p5] \n\t" 549 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p4] \n\t" 550 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p3] \n\t" 551 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p2] \n\t" 552 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p1] \n\t" 553 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p0] \n\t" 554 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q0] \n\t" 555 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q1] \n\t" 556 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q2] \n\t" 557 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q3] \n\t" 558 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q4] \n\t" 559 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q5] \n\t" 560 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q6] \n\t" 561 "addu.ph %[add_p6toq6], %[add_p6toq6], %[u32Eight] \n\t" 562 563 : [add_p6toq6] "=&r" (add_p6toq6) 564 : [p6] "r" (p6), [p5] "r" (p5), [p4] "r" (p4), 565 [p3] "r" (p3), [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0), 566 [q0] "r" (q0), [q1] "r" (q1), [q2] "r" (q2), [q3] "r" (q3), 567 [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), 568 [u32Eight] "r" (u32Eight) 569 ); 570 571 __asm__ __volatile__ ( 572 /* *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + 573 p3 + p2 + p1 + p0 + q0, 4) */ 574 "shll.ph %[tmp], %[p7], 3 \n\t" 575 "subu.ph %[res_op6], %[tmp], %[p7] \n\t" 576 "addu.ph %[res_op6], %[res_op6], %[p6] \n\t" 577 "addu.ph %[res_op6], %[res_op6], %[add_p6toq6] \n\t" 578 "subu.ph %[res_op6], %[res_op6], %[q1] \n\t" 579 "subu.ph %[res_op6], %[res_op6], %[q2] \n\t" 580 "subu.ph %[res_op6], %[res_op6], %[q3] \n\t" 581 "subu.ph %[res_op6], %[res_op6], %[q4] \n\t" 582 "subu.ph %[res_op6], %[res_op6], %[q5] \n\t" 583 "subu.ph %[res_op6], %[res_op6], %[q6] \n\t" 584 "shrl.ph %[res_op6], %[res_op6], 4 \n\t" 585 586 /* *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + 587 p2 + p1 + p0 + q0 + q1, 4) */ 588 "shll.ph %[tmp], %[p7], 2 \n\t" 589 "addu.ph %[res_op5], %[tmp], %[p7] \n\t" 590 "addu.ph %[res_op5], %[res_op5], %[p7] \n\t" 591 "addu.ph %[res_op5], %[res_op5], %[p5] \n\t" 592 "addu.ph %[res_op5], %[res_op5], %[add_p6toq6] \n\t" 593 "subu.ph %[res_op5], %[res_op5], %[q2] \n\t" 594 "subu.ph %[res_op5], %[res_op5], %[q3] \n\t" 595 "subu.ph %[res_op5], %[res_op5], %[q4] \n\t" 596 "subu.ph %[res_op5], %[res_op5], %[q5] \n\t" 597 "subu.ph %[res_op5], %[res_op5], %[q6] \n\t" 598 "shrl.ph %[res_op5], %[res_op5], 4 \n\t" 599 600 /* *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + 601 p1 + p0 + q0 + q1 + q2, 4) */ 602 "shll.ph %[tmp], %[p7], 2 \n\t" 603 "addu.ph %[res_op4], %[tmp], %[p7] \n\t" 604 "addu.ph %[res_op4], %[res_op4], %[p4] \n\t" 605 "addu.ph %[res_op4], %[res_op4], %[add_p6toq6] \n\t" 606 "subu.ph %[res_op4], %[res_op4], %[q3] \n\t" 607 "subu.ph %[res_op4], %[res_op4], %[q4] \n\t" 608 "subu.ph %[res_op4], %[res_op4], %[q5] \n\t" 609 "subu.ph %[res_op4], %[res_op4], %[q6] \n\t" 610 "shrl.ph %[res_op4], %[res_op4], 4 \n\t" 611 612 /* *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + 613 p1 + p0 + q0 + q1 + q2 + q3, 4) */ 614 "shll.ph %[tmp], %[p7], 2 \n\t" 615 "addu.ph %[res_op3], %[tmp], %[p3] \n\t" 616 "addu.ph %[res_op3], %[res_op3], %[add_p6toq6] \n\t" 617 "subu.ph %[res_op3], %[res_op3], %[q4] \n\t" 618 "subu.ph %[res_op3], %[res_op3], %[q5] \n\t" 619 "subu.ph %[res_op3], %[res_op3], %[q6] \n\t" 620 "shrl.ph %[res_op3], %[res_op3], 4 \n\t" 621 622 /* *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + 623 p0 + q0 + q1 + q2 + q3 + q4, 4) */ 624 "shll.ph %[tmp], %[p7], 1 \n\t" 625 "addu.ph %[res_op2], %[tmp], %[p7] \n\t" 626 "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" 627 "addu.ph %[res_op2], %[res_op2], %[add_p6toq6] \n\t" 628 "subu.ph %[res_op2], %[res_op2], %[q5] \n\t" 629 "subu.ph %[res_op2], %[res_op2], %[q6] \n\t" 630 "shrl.ph %[res_op2], %[res_op2], 4 \n\t" 631 632 /* *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + 633 p0 + q0 + q1 + q2 + q3 + q4 + q5, 4); */ 634 "shll.ph %[tmp], %[p7], 1 \n\t" 635 "addu.ph %[res_op1], %[tmp], %[p1] \n\t" 636 "addu.ph %[res_op1], %[res_op1], %[add_p6toq6] \n\t" 637 "subu.ph %[res_op1], %[res_op1], %[q6] \n\t" 638 "shrl.ph %[res_op1], %[res_op1], 4 \n\t" 639 640 /* *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + 641 q0 + q1 + q2 + q3 + q4 + q5 + q6, 4) */ 642 "addu.ph %[res_op0], %[p7], %[p0] \n\t" 643 "addu.ph %[res_op0], %[res_op0], %[add_p6toq6] \n\t" 644 "shrl.ph %[res_op0], %[res_op0], 4 \n\t" 645 646 : [res_op6] "=&r" (res_op6), [res_op5] "=&r" (res_op5), 647 [res_op4] "=&r" (res_op4), [res_op3] "=&r" (res_op3), 648 [res_op2] "=&r" (res_op2), [res_op1] "=&r" (res_op1), 649 [res_op0] "=&r" (res_op0), [tmp] "=&r" (tmp) 650 : [p7] "r" (p7), [p6] "r" (p6), [p5] "r" (p5), [p4] "r" (p4), 651 [p3] "r" (p3), [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0), 652 [q2] "r" (q2), [q1] "r" (q1), 653 [q3] "r" (q3), [q4] "r" (q4), [q5] "r" (q5), [q6] "r" (q6), 654 [add_p6toq6] "r" (add_p6toq6) 655 ); 656 657 *op6 = res_op6; 658 *op5 = res_op5; 659 *op4 = res_op4; 660 *op3 = res_op3; 661 *op2 = res_op2; 662 *op1 = res_op1; 663 *op0 = res_op0; 664 665 __asm__ __volatile__ ( 666 /* *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + 667 q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); */ 668 "addu.ph %[res_oq0], %[q7], %[q0] \n\t" 669 "addu.ph %[res_oq0], %[res_oq0], %[add_p6toq6] \n\t" 670 "shrl.ph %[res_oq0], %[res_oq0], 4 \n\t" 671 672 /* *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + 673 q2 + q3 + q4 + q5 + q6 + q7 * 2, 4) */ 674 "shll.ph %[tmp], %[q7], 1 \n\t" 675 "addu.ph %[res_oq1], %[tmp], %[q1] \n\t" 676 "addu.ph %[res_oq1], %[res_oq1], %[add_p6toq6] \n\t" 677 "subu.ph %[res_oq1], %[res_oq1], %[p6] \n\t" 678 "shrl.ph %[res_oq1], %[res_oq1], 4 \n\t" 679 680 /* *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + 681 q3 + q4 + q5 + q6 + q7 * 3, 4) */ 682 "shll.ph %[tmp], %[q7], 1 \n\t" 683 "addu.ph %[res_oq2], %[tmp], %[q7] \n\t" 684 "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" 685 "addu.ph %[res_oq2], %[res_oq2], %[add_p6toq6] \n\t" 686 "subu.ph %[res_oq2], %[res_oq2], %[p5] \n\t" 687 "subu.ph %[res_oq2], %[res_oq2], %[p6] \n\t" 688 "shrl.ph %[res_oq2], %[res_oq2], 4 \n\t" 689 690 /* *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + q0 + q1 + q2 + 691 q3 * 2 + q4 + q5 + q6 + q7 * 4, 4) */ 692 "shll.ph %[tmp], %[q7], 2 \n\t" 693 "addu.ph %[res_oq3], %[tmp], %[q3] \n\t" 694 "addu.ph %[res_oq3], %[res_oq3], %[add_p6toq6] \n\t" 695 "subu.ph %[res_oq3], %[res_oq3], %[p4] \n\t" 696 "subu.ph %[res_oq3], %[res_oq3], %[p5] \n\t" 697 "subu.ph %[res_oq3], %[res_oq3], %[p6] \n\t" 698 "shrl.ph %[res_oq3], %[res_oq3], 4 \n\t" 699 700 /* *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q1 + q2 + q3 + 701 q4 * 2 + q5 + q6 + q7 * 5, 4) */ 702 "shll.ph %[tmp], %[q7], 2 \n\t" 703 "addu.ph %[res_oq4], %[tmp], %[q7] \n\t" 704 "addu.ph %[res_oq4], %[res_oq4], %[q4] \n\t" 705 "addu.ph %[res_oq4], %[res_oq4], %[add_p6toq6] \n\t" 706 "subu.ph %[res_oq4], %[res_oq4], %[p3] \n\t" 707 "subu.ph %[res_oq4], %[res_oq4], %[p4] \n\t" 708 "subu.ph %[res_oq4], %[res_oq4], %[p5] \n\t" 709 "subu.ph %[res_oq4], %[res_oq4], %[p6] \n\t" 710 "shrl.ph %[res_oq4], %[res_oq4], 4 \n\t" 711 712 /* *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q2 + q3 + q4 + 713 q5 * 2 + q6 + q7 * 6, 4) */ 714 "shll.ph %[tmp], %[q7], 2 \n\t" 715 "addu.ph %[res_oq5], %[tmp], %[q7] \n\t" 716 "addu.ph %[res_oq5], %[res_oq5], %[q7] \n\t" 717 "addu.ph %[res_oq5], %[res_oq5], %[q5] \n\t" 718 "addu.ph %[res_oq5], %[res_oq5], %[add_p6toq6] \n\t" 719 "subu.ph %[res_oq5], %[res_oq5], %[p2] \n\t" 720 "subu.ph %[res_oq5], %[res_oq5], %[p3] \n\t" 721 "subu.ph %[res_oq5], %[res_oq5], %[p4] \n\t" 722 "subu.ph %[res_oq5], %[res_oq5], %[p5] \n\t" 723 "subu.ph %[res_oq5], %[res_oq5], %[p6] \n\t" 724 "shrl.ph %[res_oq5], %[res_oq5], 4 \n\t" 725 726 /* *oq6 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 + 727 q4 + q5 + q6 * 2 + q7 * 7, 4) */ 728 "shll.ph %[tmp], %[q7], 3 \n\t" 729 "subu.ph %[res_oq6], %[tmp], %[q7] \n\t" 730 "addu.ph %[res_oq6], %[res_oq6], %[q6] \n\t" 731 "addu.ph %[res_oq6], %[res_oq6], %[add_p6toq6] \n\t" 732 "subu.ph %[res_oq6], %[res_oq6], %[p1] \n\t" 733 "subu.ph %[res_oq6], %[res_oq6], %[p2] \n\t" 734 "subu.ph %[res_oq6], %[res_oq6], %[p3] \n\t" 735 "subu.ph %[res_oq6], %[res_oq6], %[p4] \n\t" 736 "subu.ph %[res_oq6], %[res_oq6], %[p5] \n\t" 737 "subu.ph %[res_oq6], %[res_oq6], %[p6] \n\t" 738 "shrl.ph %[res_oq6], %[res_oq6], 4 \n\t" 739 740 : [res_oq6] "=&r" (res_oq6), [res_oq5] "=&r" (res_oq5), 741 [res_oq4] "=&r" (res_oq4), [res_oq3] "=&r" (res_oq3), 742 [res_oq2] "=&r" (res_oq2), [res_oq1] "=&r" (res_oq1), 743 [res_oq0] "=&r" (res_oq0), [tmp] "=&r" (tmp) 744 : [q7] "r" (q7), [q6] "r" (q6), [q5] "r" (q5), [q4] "r" (q4), 745 [q3] "r" (q3), [q2] "r" (q2), [q1] "r" (q1), [q0] "r" (q0), 746 [p1] "r" (p1), [p2] "r" (p2), 747 [p3] "r" (p3), [p4] "r" (p4), [p5] "r" (p5), [p6] "r" (p6), 748 [add_p6toq6] "r" (add_p6toq6) 749 ); 750 751 *oq0 = res_oq0; 752 *oq1 = res_oq1; 753 *oq2 = res_oq2; 754 *oq3 = res_oq3; 755 *oq4 = res_oq4; 756 *oq5 = res_oq5; 757 *oq6 = res_oq6; 758} 759#endif // #if HAVE_DSPR2 760#ifdef __cplusplus 761} // extern "C" 762#endif 763 764#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ 765