1/* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ 12#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ 13 14#include <stdlib.h> 15 16#include "./vpx_dsp_rtcd.h" 17#include "vpx/vpx_integer.h" 18#include "vpx_mem/vpx_mem.h" 19#include "vpx_ports/mem.h" 20 21#ifdef __cplusplus 22extern "C" { 23#endif 24 25#if HAVE_DSPR2 26/* inputs & outputs are quad-byte vectors */ 27static INLINE void filter_dspr2(uint32_t mask, uint32_t hev, uint32_t *ps1, 28 uint32_t *ps0, uint32_t *qs0, uint32_t *qs1) { 29 int32_t vpx_filter_l, vpx_filter_r; 30 int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; 31 int32_t subr_r, subr_l; 32 uint32_t t1, t2, HWM, t3; 33 uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; 34 int32_t vps1, vps0, vqs0, vqs1; 35 int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; 36 uint32_t N128; 37 38 N128 = 0x80808080; 39 t1 = 0x03000300; 40 t2 = 0x04000400; 41 t3 = 0x01000100; 42 HWM = 0xFF00FF00; 43 44 vps0 = (*ps0) ^ N128; 45 vps1 = (*ps1) ^ N128; 46 vqs0 = (*qs0) ^ N128; 47 vqs1 = (*qs1) ^ N128; 48 49 /* use halfword pairs instead quad-bytes because of accuracy */ 50 vps0_l = vps0 & HWM; 51 vps0_r = vps0 << 8; 52 vps0_r = vps0_r & HWM; 53 54 vps1_l = vps1 & HWM; 55 vps1_r = vps1 << 8; 56 vps1_r = vps1_r & HWM; 57 58 vqs0_l = vqs0 & HWM; 59 vqs0_r = vqs0 << 8; 60 vqs0_r = vqs0_r & HWM; 61 62 vqs1_l = vqs1 & HWM; 63 vqs1_r = vqs1 << 8; 64 vqs1_r = vqs1_r & HWM; 65 66 mask_l = mask & HWM; 67 mask_r = mask << 8; 68 mask_r = mask_r & HWM; 69 70 hev_l = hev & HWM; 71 hev_r = hev << 8; 72 hev_r = hev_r & HWM; 73 74 __asm__ __volatile__( 75 /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */ 76 "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t" 77 "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t" 78 79 /* qs0 - ps0 */ 80 "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" 81 "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" 82 83 /* vpx_filter &= hev; */ 84 "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t" 85 "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t" 86 87 /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */ 88 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 89 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 90 "xor %[invhev_l], %[hev_l], %[HWM] \n\t" 91 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 92 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 93 "xor %[invhev_r], %[hev_r], %[HWM] \n\t" 94 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 95 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 96 97 /* vpx_filter &= mask; */ 98 "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t" 99 "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t" 100 101 : [vpx_filter_l] "=&r"(vpx_filter_l), [vpx_filter_r] "=&r"(vpx_filter_r), 102 [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r), 103 [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r) 104 : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l), 105 [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r), 106 [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r), [mask_l] "r"(mask_l), 107 [mask_r] "r"(mask_r), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r), 108 [HWM] "r"(HWM)); 109 110 /* save bottom 3 bits so that we round one side +4 and the other +3 */ 111 __asm__ __volatile__( 112 /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */ 113 "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t" 114 "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t" 115 116 /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */ 117 "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t" 118 "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t" 119 "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" 120 "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" 121 122 "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" 123 "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" 124 125 "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" 126 "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" 127 128 /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ 129 "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" 130 "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" 131 132 /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ 133 "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" 134 "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" 135 136 : [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r), 137 [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r), 138 [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l), 139 [vqs0_r] "+r"(vqs0_r) 140 : [t1] "r"(t1), [t2] "r"(t2), [HWM] "r"(HWM), 141 [vpx_filter_l] "r"(vpx_filter_l), [vpx_filter_r] "r"(vpx_filter_r)); 142 143 __asm__ __volatile__( 144 /* (vpx_filter += 1) >>= 1 */ 145 "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" 146 "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" 147 148 /* vpx_filter &= ~hev; */ 149 "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" 150 "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" 151 152 /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */ 153 "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" 154 "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" 155 156 /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */ 157 "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" 158 "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" 159 160 : [Filter1_l] "+r"(Filter1_l), [Filter1_r] "+r"(Filter1_r), 161 [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l), 162 [vqs1_r] "+r"(vqs1_r) 163 : [t3] "r"(t3), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r)); 164 165 /* Create quad-bytes from halfword pairs */ 166 vqs0_l = vqs0_l & HWM; 167 vqs1_l = vqs1_l & HWM; 168 vps0_l = vps0_l & HWM; 169 vps1_l = vps1_l & HWM; 170 171 __asm__ __volatile__( 172 "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" 173 "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" 174 "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" 175 "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" 176 177 : [vps1_r] "+r"(vps1_r), [vqs1_r] "+r"(vqs1_r), [vps0_r] "+r"(vps0_r), 178 [vqs0_r] "+r"(vqs0_r) 179 :); 180 181 vqs0 = vqs0_l | vqs0_r; 182 vqs1 = vqs1_l | vqs1_r; 183 vps0 = vps0_l | vps0_r; 184 vps1 = vps1_l | vps1_r; 185 186 *ps0 = vps0 ^ N128; 187 *ps1 = vps1 ^ N128; 188 *qs0 = vqs0 ^ N128; 189 *qs1 = vqs1 ^ N128; 190} 191 192static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev, uint32_t ps1, 193 uint32_t ps0, uint32_t qs0, uint32_t qs1, 194 uint32_t *p1_f0, uint32_t *p0_f0, 195 uint32_t *q0_f0, uint32_t *q1_f0) { 196 int32_t vpx_filter_l, vpx_filter_r; 197 int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; 198 int32_t subr_r, subr_l; 199 uint32_t t1, t2, HWM, t3; 200 uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; 201 int32_t vps1, vps0, vqs0, vqs1; 202 int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; 203 uint32_t N128; 204 205 N128 = 0x80808080; 206 t1 = 0x03000300; 207 t2 = 0x04000400; 208 t3 = 0x01000100; 209 HWM = 0xFF00FF00; 210 211 vps0 = (ps0) ^ N128; 212 vps1 = (ps1) ^ N128; 213 vqs0 = (qs0) ^ N128; 214 vqs1 = (qs1) ^ N128; 215 216 /* use halfword pairs instead quad-bytes because of accuracy */ 217 vps0_l = vps0 & HWM; 218 vps0_r = vps0 << 8; 219 vps0_r = vps0_r & HWM; 220 221 vps1_l = vps1 & HWM; 222 vps1_r = vps1 << 8; 223 vps1_r = vps1_r & HWM; 224 225 vqs0_l = vqs0 & HWM; 226 vqs0_r = vqs0 << 8; 227 vqs0_r = vqs0_r & HWM; 228 229 vqs1_l = vqs1 & HWM; 230 vqs1_r = vqs1 << 8; 231 vqs1_r = vqs1_r & HWM; 232 233 mask_l = mask & HWM; 234 mask_r = mask << 8; 235 mask_r = mask_r & HWM; 236 237 hev_l = hev & HWM; 238 hev_r = hev << 8; 239 hev_r = hev_r & HWM; 240 241 __asm__ __volatile__( 242 /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */ 243 "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t" 244 "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t" 245 246 /* qs0 - ps0 */ 247 "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" 248 "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" 249 250 /* vpx_filter &= hev; */ 251 "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t" 252 "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t" 253 254 /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */ 255 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 256 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 257 "xor %[invhev_l], %[hev_l], %[HWM] \n\t" 258 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 259 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 260 "xor %[invhev_r], %[hev_r], %[HWM] \n\t" 261 "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" 262 "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" 263 264 /* vpx_filter &= mask; */ 265 "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t" 266 "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t" 267 268 : [vpx_filter_l] "=&r"(vpx_filter_l), [vpx_filter_r] "=&r"(vpx_filter_r), 269 [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r), 270 [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r) 271 : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l), 272 [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r), 273 [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r), [mask_l] "r"(mask_l), 274 [mask_r] "r"(mask_r), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r), 275 [HWM] "r"(HWM)); 276 277 /* save bottom 3 bits so that we round one side +4 and the other +3 */ 278 __asm__ __volatile__( 279 /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */ 280 "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t" 281 "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t" 282 283 /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */ 284 "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t" 285 "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t" 286 "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" 287 "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" 288 289 "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" 290 "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" 291 292 "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" 293 "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" 294 295 /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ 296 "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" 297 "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" 298 299 /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ 300 "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" 301 "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" 302 303 : [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r), 304 [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r), 305 [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l), 306 [vqs0_r] "+r"(vqs0_r) 307 : [t1] "r"(t1), [t2] "r"(t2), [HWM] "r"(HWM), 308 [vpx_filter_l] "r"(vpx_filter_l), [vpx_filter_r] "r"(vpx_filter_r)); 309 310 __asm__ __volatile__( 311 /* (vpx_filter += 1) >>= 1 */ 312 "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" 313 "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" 314 315 /* vpx_filter &= ~hev; */ 316 "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" 317 "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" 318 319 /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */ 320 "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" 321 "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" 322 323 /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */ 324 "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" 325 "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" 326 327 : [Filter1_l] "+r"(Filter1_l), [Filter1_r] "+r"(Filter1_r), 328 [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l), 329 [vqs1_r] "+r"(vqs1_r) 330 : [t3] "r"(t3), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r)); 331 332 /* Create quad-bytes from halfword pairs */ 333 vqs0_l = vqs0_l & HWM; 334 vqs1_l = vqs1_l & HWM; 335 vps0_l = vps0_l & HWM; 336 vps1_l = vps1_l & HWM; 337 338 __asm__ __volatile__( 339 "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" 340 "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" 341 "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" 342 "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" 343 344 : [vps1_r] "+r"(vps1_r), [vqs1_r] "+r"(vqs1_r), [vps0_r] "+r"(vps0_r), 345 [vqs0_r] "+r"(vqs0_r) 346 :); 347 348 vqs0 = vqs0_l | vqs0_r; 349 vqs1 = vqs1_l | vqs1_r; 350 vps0 = vps0_l | vps0_r; 351 vps1 = vps1_l | vps1_r; 352 353 *p0_f0 = vps0 ^ N128; 354 *p1_f0 = vps1 ^ N128; 355 *q0_f0 = vqs0 ^ N128; 356 *q1_f0 = vqs1 ^ N128; 357} 358 359static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2, uint32_t *op1, 360 uint32_t *op0, uint32_t *oq0, uint32_t *oq1, 361 uint32_t *oq2, uint32_t *oq3) { 362 /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ 363 const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; 364 const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; 365 uint32_t res_op2, res_op1, res_op0; 366 uint32_t res_oq0, res_oq1, res_oq2; 367 uint32_t tmp; 368 uint32_t add_p210_q012; 369 uint32_t u32Four = 0x00040004; 370 371 /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */ 372 /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */ 373 /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */ 374 /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */ 375 /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */ 376 /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */ 377 378 __asm__ __volatile__( 379 "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t" 380 "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t" 381 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t" 382 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t" 383 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t" 384 "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t" 385 386 "shll.ph %[tmp], %[p3], 1 \n\t" 387 "addu.ph %[res_op2], %[tmp], %[p3] \n\t" 388 "addu.ph %[res_op1], %[p3], %[p3] \n\t" 389 "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" 390 "addu.ph %[res_op1], %[res_op1], %[p1] \n\t" 391 "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t" 392 "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t" 393 "subu.ph %[res_op2], %[res_op2], %[q1] \n\t" 394 "subu.ph %[res_op1], %[res_op1], %[q2] \n\t" 395 "subu.ph %[res_op2], %[res_op2], %[q2] \n\t" 396 "shrl.ph %[res_op1], %[res_op1], 3 \n\t" 397 "shrl.ph %[res_op2], %[res_op2], 3 \n\t" 398 "addu.ph %[res_op0], %[p3], %[p0] \n\t" 399 "addu.ph %[res_oq0], %[q0], %[q3] \n\t" 400 "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t" 401 "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t" 402 "addu.ph %[res_oq1], %[q3], %[q3] \n\t" 403 "shll.ph %[tmp], %[q3], 1 \n\t" 404 "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t" 405 "addu.ph %[res_oq2], %[tmp], %[q3] \n\t" 406 "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t" 407 "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t" 408 "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t" 409 "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" 410 "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t" 411 "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t" 412 "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t" 413 "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t" 414 "shrl.ph %[res_op0], %[res_op0], 3 \n\t" 415 "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t" 416 417 : [add_p210_q012] "=&r"(add_p210_q012), [tmp] "=&r"(tmp), 418 [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1), 419 [res_op0] "=&r"(res_op0), [res_oq0] "=&r"(res_oq0), 420 [res_oq1] "=&r"(res_oq1), [res_oq2] "=&r"(res_oq2) 421 : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [q1] "r"(q1), [p2] "r"(p2), 422 [q2] "r"(q2), [p3] "r"(p3), [q3] "r"(q3), [u32Four] "r"(u32Four)); 423 424 *op2 = res_op2; 425 *op1 = res_op1; 426 *op0 = res_op0; 427 *oq0 = res_oq0; 428 *oq1 = res_oq1; 429 *oq2 = res_oq2; 430} 431 432static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2, uint32_t p1, 433 uint32_t p0, uint32_t q0, uint32_t q1, 434 uint32_t q2, uint32_t q3, uint32_t *op2_f1, 435 uint32_t *op1_f1, uint32_t *op0_f1, 436 uint32_t *oq0_f1, uint32_t *oq1_f1, 437 uint32_t *oq2_f1) { 438 /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ 439 uint32_t res_op2, res_op1, res_op0; 440 uint32_t res_oq0, res_oq1, res_oq2; 441 uint32_t tmp; 442 uint32_t add_p210_q012; 443 uint32_t u32Four = 0x00040004; 444 445 /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */ 446 /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */ 447 /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */ 448 /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */ 449 /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */ 450 /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */ 451 452 __asm__ __volatile__( 453 "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t" 454 "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t" 455 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t" 456 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t" 457 "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t" 458 "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t" 459 460 "shll.ph %[tmp], %[p3], 1 \n\t" 461 "addu.ph %[res_op2], %[tmp], %[p3] \n\t" 462 "addu.ph %[res_op1], %[p3], %[p3] \n\t" 463 "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" 464 "addu.ph %[res_op1], %[res_op1], %[p1] \n\t" 465 "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t" 466 "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t" 467 "subu.ph %[res_op2], %[res_op2], %[q1] \n\t" 468 "subu.ph %[res_op1], %[res_op1], %[q2] \n\t" 469 "subu.ph %[res_op2], %[res_op2], %[q2] \n\t" 470 "shrl.ph %[res_op1], %[res_op1], 3 \n\t" 471 "shrl.ph %[res_op2], %[res_op2], 3 \n\t" 472 "addu.ph %[res_op0], %[p3], %[p0] \n\t" 473 "addu.ph %[res_oq0], %[q0], %[q3] \n\t" 474 "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t" 475 "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t" 476 "addu.ph %[res_oq1], %[q3], %[q3] \n\t" 477 "shll.ph %[tmp], %[q3], 1 \n\t" 478 "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t" 479 "addu.ph %[res_oq2], %[tmp], %[q3] \n\t" 480 "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t" 481 "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t" 482 "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t" 483 "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" 484 "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t" 485 "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t" 486 "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t" 487 "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t" 488 "shrl.ph %[res_op0], %[res_op0], 3 \n\t" 489 "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t" 490 491 : [add_p210_q012] "=&r"(add_p210_q012), [tmp] "=&r"(tmp), 492 [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1), 493 [res_op0] "=&r"(res_op0), [res_oq0] "=&r"(res_oq0), 494 [res_oq1] "=&r"(res_oq1), [res_oq2] "=&r"(res_oq2) 495 : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [q1] "r"(q1), [p2] "r"(p2), 496 [q2] "r"(q2), [p3] "r"(p3), [q3] "r"(q3), [u32Four] "r"(u32Four)); 497 498 *op2_f1 = res_op2; 499 *op1_f1 = res_op1; 500 *op0_f1 = res_op0; 501 *oq0_f1 = res_oq0; 502 *oq1_f1 = res_oq1; 503 *oq2_f1 = res_oq2; 504} 505 506static INLINE void wide_mbfilter_dspr2( 507 uint32_t *op7, uint32_t *op6, uint32_t *op5, uint32_t *op4, uint32_t *op3, 508 uint32_t *op2, uint32_t *op1, uint32_t *op0, uint32_t *oq0, uint32_t *oq1, 509 uint32_t *oq2, uint32_t *oq3, uint32_t *oq4, uint32_t *oq5, uint32_t *oq6, 510 uint32_t *oq7) { 511 const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4; 512 const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; 513 const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; 514 const uint32_t q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; 515 uint32_t res_op6, res_op5, res_op4, res_op3, res_op2, res_op1, res_op0; 516 uint32_t res_oq0, res_oq1, res_oq2, res_oq3, res_oq4, res_oq5, res_oq6; 517 uint32_t tmp; 518 uint32_t add_p6toq6; 519 uint32_t u32Eight = 0x00080008; 520 521 __asm__ __volatile__( 522 /* addition of p6,p5,p4,p3,p2,p1,p0,q0,q1,q2,q3,q4,q5,q6 523 which is used most of the time */ 524 "addu.ph %[add_p6toq6], %[p6], %[p5] \n\t" 525 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p4] \n\t" 526 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p3] \n\t" 527 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p2] \n\t" 528 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p1] \n\t" 529 "addu.ph %[add_p6toq6], %[add_p6toq6], %[p0] \n\t" 530 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q0] \n\t" 531 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q1] \n\t" 532 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q2] \n\t" 533 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q3] \n\t" 534 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q4] \n\t" 535 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q5] \n\t" 536 "addu.ph %[add_p6toq6], %[add_p6toq6], %[q6] \n\t" 537 "addu.ph %[add_p6toq6], %[add_p6toq6], %[u32Eight] \n\t" 538 539 : [add_p6toq6] "=&r"(add_p6toq6) 540 : [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), 541 [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), 542 [q3] "r"(q3), [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), 543 [u32Eight] "r"(u32Eight)); 544 545 __asm__ __volatile__( 546 /* *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + 547 p3 + p2 + p1 + p0 + q0, 4) */ 548 "shll.ph %[tmp], %[p7], 3 \n\t" 549 "subu.ph %[res_op6], %[tmp], %[p7] \n\t" 550 "addu.ph %[res_op6], %[res_op6], %[p6] \n\t" 551 "addu.ph %[res_op6], %[res_op6], %[add_p6toq6] \n\t" 552 "subu.ph %[res_op6], %[res_op6], %[q1] \n\t" 553 "subu.ph %[res_op6], %[res_op6], %[q2] \n\t" 554 "subu.ph %[res_op6], %[res_op6], %[q3] \n\t" 555 "subu.ph %[res_op6], %[res_op6], %[q4] \n\t" 556 "subu.ph %[res_op6], %[res_op6], %[q5] \n\t" 557 "subu.ph %[res_op6], %[res_op6], %[q6] \n\t" 558 "shrl.ph %[res_op6], %[res_op6], 4 \n\t" 559 560 /* *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + 561 p2 + p1 + p0 + q0 + q1, 4) */ 562 "shll.ph %[tmp], %[p7], 2 \n\t" 563 "addu.ph %[res_op5], %[tmp], %[p7] \n\t" 564 "addu.ph %[res_op5], %[res_op5], %[p7] \n\t" 565 "addu.ph %[res_op5], %[res_op5], %[p5] \n\t" 566 "addu.ph %[res_op5], %[res_op5], %[add_p6toq6] \n\t" 567 "subu.ph %[res_op5], %[res_op5], %[q2] \n\t" 568 "subu.ph %[res_op5], %[res_op5], %[q3] \n\t" 569 "subu.ph %[res_op5], %[res_op5], %[q4] \n\t" 570 "subu.ph %[res_op5], %[res_op5], %[q5] \n\t" 571 "subu.ph %[res_op5], %[res_op5], %[q6] \n\t" 572 "shrl.ph %[res_op5], %[res_op5], 4 \n\t" 573 574 /* *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + 575 p1 + p0 + q0 + q1 + q2, 4) */ 576 "shll.ph %[tmp], %[p7], 2 \n\t" 577 "addu.ph %[res_op4], %[tmp], %[p7] \n\t" 578 "addu.ph %[res_op4], %[res_op4], %[p4] \n\t" 579 "addu.ph %[res_op4], %[res_op4], %[add_p6toq6] \n\t" 580 "subu.ph %[res_op4], %[res_op4], %[q3] \n\t" 581 "subu.ph %[res_op4], %[res_op4], %[q4] \n\t" 582 "subu.ph %[res_op4], %[res_op4], %[q5] \n\t" 583 "subu.ph %[res_op4], %[res_op4], %[q6] \n\t" 584 "shrl.ph %[res_op4], %[res_op4], 4 \n\t" 585 586 /* *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + 587 p1 + p0 + q0 + q1 + q2 + q3, 4) */ 588 "shll.ph %[tmp], %[p7], 2 \n\t" 589 "addu.ph %[res_op3], %[tmp], %[p3] \n\t" 590 "addu.ph %[res_op3], %[res_op3], %[add_p6toq6] \n\t" 591 "subu.ph %[res_op3], %[res_op3], %[q4] \n\t" 592 "subu.ph %[res_op3], %[res_op3], %[q5] \n\t" 593 "subu.ph %[res_op3], %[res_op3], %[q6] \n\t" 594 "shrl.ph %[res_op3], %[res_op3], 4 \n\t" 595 596 /* *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + 597 p0 + q0 + q1 + q2 + q3 + q4, 4) */ 598 "shll.ph %[tmp], %[p7], 1 \n\t" 599 "addu.ph %[res_op2], %[tmp], %[p7] \n\t" 600 "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" 601 "addu.ph %[res_op2], %[res_op2], %[add_p6toq6] \n\t" 602 "subu.ph %[res_op2], %[res_op2], %[q5] \n\t" 603 "subu.ph %[res_op2], %[res_op2], %[q6] \n\t" 604 "shrl.ph %[res_op2], %[res_op2], 4 \n\t" 605 606 /* *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + 607 p0 + q0 + q1 + q2 + q3 + q4 + q5, 4); */ 608 "shll.ph %[tmp], %[p7], 1 \n\t" 609 "addu.ph %[res_op1], %[tmp], %[p1] \n\t" 610 "addu.ph %[res_op1], %[res_op1], %[add_p6toq6] \n\t" 611 "subu.ph %[res_op1], %[res_op1], %[q6] \n\t" 612 "shrl.ph %[res_op1], %[res_op1], 4 \n\t" 613 614 /* *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + 615 q0 + q1 + q2 + q3 + q4 + q5 + q6, 4) */ 616 "addu.ph %[res_op0], %[p7], %[p0] \n\t" 617 "addu.ph %[res_op0], %[res_op0], %[add_p6toq6] \n\t" 618 "shrl.ph %[res_op0], %[res_op0], 4 \n\t" 619 620 : [res_op6] "=&r"(res_op6), [res_op5] "=&r"(res_op5), 621 [res_op4] "=&r"(res_op4), [res_op3] "=&r"(res_op3), 622 [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1), 623 [res_op0] "=&r"(res_op0), [tmp] "=&r"(tmp) 624 : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), 625 [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q2] "r"(q2), [q1] "r"(q1), 626 [q3] "r"(q3), [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), 627 [add_p6toq6] "r"(add_p6toq6)); 628 629 *op6 = res_op6; 630 *op5 = res_op5; 631 *op4 = res_op4; 632 *op3 = res_op3; 633 *op2 = res_op2; 634 *op1 = res_op1; 635 *op0 = res_op0; 636 637 __asm__ __volatile__( 638 /* *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + 639 q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); */ 640 "addu.ph %[res_oq0], %[q7], %[q0] \n\t" 641 "addu.ph %[res_oq0], %[res_oq0], %[add_p6toq6] \n\t" 642 "shrl.ph %[res_oq0], %[res_oq0], 4 \n\t" 643 644 /* *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + 645 q2 + q3 + q4 + q5 + q6 + q7 * 2, 4) */ 646 "shll.ph %[tmp], %[q7], 1 \n\t" 647 "addu.ph %[res_oq1], %[tmp], %[q1] \n\t" 648 "addu.ph %[res_oq1], %[res_oq1], %[add_p6toq6] \n\t" 649 "subu.ph %[res_oq1], %[res_oq1], %[p6] \n\t" 650 "shrl.ph %[res_oq1], %[res_oq1], 4 \n\t" 651 652 /* *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + 653 q3 + q4 + q5 + q6 + q7 * 3, 4) */ 654 "shll.ph %[tmp], %[q7], 1 \n\t" 655 "addu.ph %[res_oq2], %[tmp], %[q7] \n\t" 656 "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" 657 "addu.ph %[res_oq2], %[res_oq2], %[add_p6toq6] \n\t" 658 "subu.ph %[res_oq2], %[res_oq2], %[p5] \n\t" 659 "subu.ph %[res_oq2], %[res_oq2], %[p6] \n\t" 660 "shrl.ph %[res_oq2], %[res_oq2], 4 \n\t" 661 662 /* *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + q0 + q1 + q2 + 663 q3 * 2 + q4 + q5 + q6 + q7 * 4, 4) */ 664 "shll.ph %[tmp], %[q7], 2 \n\t" 665 "addu.ph %[res_oq3], %[tmp], %[q3] \n\t" 666 "addu.ph %[res_oq3], %[res_oq3], %[add_p6toq6] \n\t" 667 "subu.ph %[res_oq3], %[res_oq3], %[p4] \n\t" 668 "subu.ph %[res_oq3], %[res_oq3], %[p5] \n\t" 669 "subu.ph %[res_oq3], %[res_oq3], %[p6] \n\t" 670 "shrl.ph %[res_oq3], %[res_oq3], 4 \n\t" 671 672 /* *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q1 + q2 + q3 + 673 q4 * 2 + q5 + q6 + q7 * 5, 4) */ 674 "shll.ph %[tmp], %[q7], 2 \n\t" 675 "addu.ph %[res_oq4], %[tmp], %[q7] \n\t" 676 "addu.ph %[res_oq4], %[res_oq4], %[q4] \n\t" 677 "addu.ph %[res_oq4], %[res_oq4], %[add_p6toq6] \n\t" 678 "subu.ph %[res_oq4], %[res_oq4], %[p3] \n\t" 679 "subu.ph %[res_oq4], %[res_oq4], %[p4] \n\t" 680 "subu.ph %[res_oq4], %[res_oq4], %[p5] \n\t" 681 "subu.ph %[res_oq4], %[res_oq4], %[p6] \n\t" 682 "shrl.ph %[res_oq4], %[res_oq4], 4 \n\t" 683 684 /* *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q2 + q3 + q4 + 685 q5 * 2 + q6 + q7 * 6, 4) */ 686 "shll.ph %[tmp], %[q7], 2 \n\t" 687 "addu.ph %[res_oq5], %[tmp], %[q7] \n\t" 688 "addu.ph %[res_oq5], %[res_oq5], %[q7] \n\t" 689 "addu.ph %[res_oq5], %[res_oq5], %[q5] \n\t" 690 "addu.ph %[res_oq5], %[res_oq5], %[add_p6toq6] \n\t" 691 "subu.ph %[res_oq5], %[res_oq5], %[p2] \n\t" 692 "subu.ph %[res_oq5], %[res_oq5], %[p3] \n\t" 693 "subu.ph %[res_oq5], %[res_oq5], %[p4] \n\t" 694 "subu.ph %[res_oq5], %[res_oq5], %[p5] \n\t" 695 "subu.ph %[res_oq5], %[res_oq5], %[p6] \n\t" 696 "shrl.ph %[res_oq5], %[res_oq5], 4 \n\t" 697 698 /* *oq6 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 + 699 q4 + q5 + q6 * 2 + q7 * 7, 4) */ 700 "shll.ph %[tmp], %[q7], 3 \n\t" 701 "subu.ph %[res_oq6], %[tmp], %[q7] \n\t" 702 "addu.ph %[res_oq6], %[res_oq6], %[q6] \n\t" 703 "addu.ph %[res_oq6], %[res_oq6], %[add_p6toq6] \n\t" 704 "subu.ph %[res_oq6], %[res_oq6], %[p1] \n\t" 705 "subu.ph %[res_oq6], %[res_oq6], %[p2] \n\t" 706 "subu.ph %[res_oq6], %[res_oq6], %[p3] \n\t" 707 "subu.ph %[res_oq6], %[res_oq6], %[p4] \n\t" 708 "subu.ph %[res_oq6], %[res_oq6], %[p5] \n\t" 709 "subu.ph %[res_oq6], %[res_oq6], %[p6] \n\t" 710 "shrl.ph %[res_oq6], %[res_oq6], 4 \n\t" 711 712 : [res_oq6] "=&r"(res_oq6), [res_oq5] "=&r"(res_oq5), 713 [res_oq4] "=&r"(res_oq4), [res_oq3] "=&r"(res_oq3), 714 [res_oq2] "=&r"(res_oq2), [res_oq1] "=&r"(res_oq1), 715 [res_oq0] "=&r"(res_oq0), [tmp] "=&r"(tmp) 716 : [q7] "r"(q7), [q6] "r"(q6), [q5] "r"(q5), [q4] "r"(q4), [q3] "r"(q3), 717 [q2] "r"(q2), [q1] "r"(q1), [q0] "r"(q0), [p1] "r"(p1), [p2] "r"(p2), 718 [p3] "r"(p3), [p4] "r"(p4), [p5] "r"(p5), [p6] "r"(p6), 719 [add_p6toq6] "r"(add_p6toq6)); 720 721 *oq0 = res_oq0; 722 *oq1 = res_oq1; 723 *oq2 = res_oq2; 724 *oq3 = res_oq3; 725 *oq4 = res_oq4; 726 *oq5 = res_oq5; 727 *oq6 = res_oq6; 728} 729#endif // #if HAVE_DSPR2 730#ifdef __cplusplus 731} // extern "C" 732#endif 733 734#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_FILTERS_DSPR2_H_ 735