1/* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 12#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 13 14#include <stdlib.h> 15 16#include "./vp9_rtcd.h" 17#include "vp9/common/vp9_common.h" 18#include "vp9/common/vp9_onyxc_int.h" 19 20#ifdef __cplusplus 21extern "C" { 22#endif 23 24#if HAVE_DSPR2 25/* processing 4 pixels at the same time 26 * compute hev and mask in the same function */ 27static INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit, 28 uint32_t p1, uint32_t p0, 29 uint32_t p3, uint32_t p2, 30 uint32_t q0, uint32_t q1, 31 uint32_t q2, uint32_t q3, 32 uint32_t thresh, uint32_t *hev, 33 uint32_t *mask) { 34 uint32_t c, r, r3, r_k; 35 uint32_t s1, s2, s3; 36 uint32_t ones = 0xFFFFFFFF; 37 uint32_t hev1; 38 39 __asm__ __volatile__ ( 40 /* mask |= (abs(p3 - p2) > limit) */ 41 "subu_s.qb %[c], %[p3], %[p2] \n\t" 42 "subu_s.qb %[r_k], %[p2], %[p3] \n\t" 43 "or %[r_k], %[r_k], %[c] \n\t" 44 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 45 "or %[r], $0, %[c] \n\t" 46 47 /* mask |= (abs(p2 - p1) > limit) */ 48 "subu_s.qb %[c], %[p2], %[p1] \n\t" 49 "subu_s.qb %[r_k], %[p1], %[p2] \n\t" 50 "or %[r_k], %[r_k], %[c] \n\t" 51 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 52 "or %[r], %[r], %[c] \n\t" 53 54 /* mask |= (abs(p1 - p0) > limit) 55 * hev |= (abs(p1 - p0) > thresh) 56 */ 57 "subu_s.qb %[c], %[p1], %[p0] \n\t" 58 "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 59 "or %[r_k], %[r_k], %[c] \n\t" 60 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 61 "or %[r3], $0, %[c] \n\t" 62 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 63 "or %[r], %[r], %[c] \n\t" 64 65 /* mask |= (abs(q1 - q0) > limit) 66 * hev |= (abs(q1 - q0) > thresh) 67 */ 68 "subu_s.qb %[c], %[q1], %[q0] \n\t" 69 "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 70 "or %[r_k], %[r_k], %[c] \n\t" 71 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 72 "or %[r3], %[r3], %[c] \n\t" 73 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 74 "or %[r], %[r], %[c] \n\t" 75 76 /* mask |= (abs(q2 - q1) > limit) */ 77 "subu_s.qb %[c], %[q2], %[q1] \n\t" 78 "subu_s.qb %[r_k], %[q1], %[q2] \n\t" 79 "or %[r_k], %[r_k], %[c] \n\t" 80 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 81 "or %[r], %[r], %[c] \n\t" 82 "sll %[r3], %[r3], 24 \n\t" 83 84 /* mask |= (abs(q3 - q2) > limit) */ 85 "subu_s.qb %[c], %[q3], %[q2] \n\t" 86 "subu_s.qb %[r_k], %[q2], %[q3] \n\t" 87 "or %[r_k], %[r_k], %[c] \n\t" 88 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 89 "or %[r], %[r], %[c] \n\t" 90 91 : [c] "=&r" (c), [r_k] "=&r" (r_k), 92 [r] "=&r" (r), [r3] "=&r" (r3) 93 : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), 94 [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), 95 [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh) 96 ); 97 98 __asm__ __volatile__ ( 99 /* abs(p0 - q0) */ 100 "subu_s.qb %[c], %[p0], %[q0] \n\t" 101 "subu_s.qb %[r_k], %[q0], %[p0] \n\t" 102 "wrdsp %[r3] \n\t" 103 "or %[s1], %[r_k], %[c] \n\t" 104 105 /* abs(p1 - q1) */ 106 "subu_s.qb %[c], %[p1], %[q1] \n\t" 107 "addu_s.qb %[s3], %[s1], %[s1] \n\t" 108 "pick.qb %[hev1], %[ones], $0 \n\t" 109 "subu_s.qb %[r_k], %[q1], %[p1] \n\t" 110 "or %[s2], %[r_k], %[c] \n\t" 111 112 /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ 113 "shrl.qb %[s2], %[s2], 1 \n\t" 114 "addu_s.qb %[s1], %[s2], %[s3] \n\t" 115 "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" 116 "or %[r], %[r], %[c] \n\t" 117 "sll %[r], %[r], 24 \n\t" 118 119 "wrdsp %[r] \n\t" 120 "pick.qb %[s2], $0, %[ones] \n\t" 121 122 : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), 123 [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) 124 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), 125 [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) 126 ); 127 128 *hev = hev1; 129 *mask = s2; 130} 131 132static INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit, 133 uint32_t flimit, 134 uint32_t thresh, 135 uint32_t p1, uint32_t p0, 136 uint32_t p3, uint32_t p2, 137 uint32_t q0, uint32_t q1, 138 uint32_t q2, uint32_t q3, 139 uint32_t *hev, 140 uint32_t *mask, 141 uint32_t *flat) { 142 uint32_t c, r, r3, r_k, r_flat; 143 uint32_t s1, s2, s3; 144 uint32_t ones = 0xFFFFFFFF; 145 uint32_t flat_thresh = 0x01010101; 146 uint32_t hev1; 147 uint32_t flat1; 148 149 __asm__ __volatile__ ( 150 /* mask |= (abs(p3 - p2) > limit) */ 151 "subu_s.qb %[c], %[p3], %[p2] \n\t" 152 "subu_s.qb %[r_k], %[p2], %[p3] \n\t" 153 "or %[r_k], %[r_k], %[c] \n\t" 154 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 155 "or %[r], $0, %[c] \n\t" 156 157 /* mask |= (abs(p2 - p1) > limit) */ 158 "subu_s.qb %[c], %[p2], %[p1] \n\t" 159 "subu_s.qb %[r_k], %[p1], %[p2] \n\t" 160 "or %[r_k], %[r_k], %[c] \n\t" 161 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 162 "or %[r], %[r], %[c] \n\t" 163 164 /* mask |= (abs(p1 - p0) > limit) 165 * hev |= (abs(p1 - p0) > thresh) 166 * flat |= (abs(p1 - p0) > thresh) 167 */ 168 "subu_s.qb %[c], %[p1], %[p0] \n\t" 169 "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 170 "or %[r_k], %[r_k], %[c] \n\t" 171 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 172 "or %[r3], $0, %[c] \n\t" 173 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 174 "or %[r], %[r], %[c] \n\t" 175 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 176 "or %[r_flat], $0, %[c] \n\t" 177 178 /* mask |= (abs(q1 - q0) > limit) 179 * hev |= (abs(q1 - q0) > thresh) 180 * flat |= (abs(q1 - q0) > thresh) 181 */ 182 "subu_s.qb %[c], %[q1], %[q0] \n\t" 183 "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 184 "or %[r_k], %[r_k], %[c] \n\t" 185 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 186 "or %[r3], %[r3], %[c] \n\t" 187 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 188 "or %[r], %[r], %[c] \n\t" 189 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 190 "or %[r_flat], %[r_flat], %[c] \n\t" 191 192 /* flat |= (abs(p0 - p2) > thresh) */ 193 "subu_s.qb %[c], %[p0], %[p2] \n\t" 194 "subu_s.qb %[r_k], %[p2], %[p0] \n\t" 195 "or %[r_k], %[r_k], %[c] \n\t" 196 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 197 "or %[r_flat], %[r_flat], %[c] \n\t" 198 199 /* flat |= (abs(q0 - q2) > thresh) */ 200 "subu_s.qb %[c], %[q0], %[q2] \n\t" 201 "subu_s.qb %[r_k], %[q2], %[q0] \n\t" 202 "or %[r_k], %[r_k], %[c] \n\t" 203 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 204 "or %[r_flat], %[r_flat], %[c] \n\t" 205 206 /* flat |= (abs(p3 - p0) > thresh) */ 207 "subu_s.qb %[c], %[p3], %[p0] \n\t" 208 "subu_s.qb %[r_k], %[p0], %[p3] \n\t" 209 "or %[r_k], %[r_k], %[c] \n\t" 210 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 211 "or %[r_flat], %[r_flat], %[c] \n\t" 212 213 /* flat |= (abs(q3 - q0) > thresh) */ 214 "subu_s.qb %[c], %[q3], %[q0] \n\t" 215 "subu_s.qb %[r_k], %[q0], %[q3] \n\t" 216 "or %[r_k], %[r_k], %[c] \n\t" 217 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 218 "or %[r_flat], %[r_flat], %[c] \n\t" 219 "sll %[r_flat], %[r_flat], 24 \n\t" 220 /* look at stall here */ 221 "wrdsp %[r_flat] \n\t" 222 "pick.qb %[flat1], $0, %[ones] \n\t" 223 224 /* mask |= (abs(q2 - q1) > limit) */ 225 "subu_s.qb %[c], %[q2], %[q1] \n\t" 226 "subu_s.qb %[r_k], %[q1], %[q2] \n\t" 227 "or %[r_k], %[r_k], %[c] \n\t" 228 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 229 "or %[r], %[r], %[c] \n\t" 230 "sll %[r3], %[r3], 24 \n\t" 231 232 /* mask |= (abs(q3 - q2) > limit) */ 233 "subu_s.qb %[c], %[q3], %[q2] \n\t" 234 "subu_s.qb %[r_k], %[q2], %[q3] \n\t" 235 "or %[r_k], %[r_k], %[c] \n\t" 236 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 237 "or %[r], %[r], %[c] \n\t" 238 239 : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), [r3] "=&r" (r3), 240 [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1) 241 : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), 242 [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), 243 [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh), 244 [flat_thresh] "r" (flat_thresh), [ones] "r" (ones) 245 ); 246 247 __asm__ __volatile__ ( 248 /* abs(p0 - q0) */ 249 "subu_s.qb %[c], %[p0], %[q0] \n\t" 250 "subu_s.qb %[r_k], %[q0], %[p0] \n\t" 251 "wrdsp %[r3] \n\t" 252 "or %[s1], %[r_k], %[c] \n\t" 253 254 /* abs(p1 - q1) */ 255 "subu_s.qb %[c], %[p1], %[q1] \n\t" 256 "addu_s.qb %[s3], %[s1], %[s1] \n\t" 257 "pick.qb %[hev1], %[ones], $0 \n\t" 258 "subu_s.qb %[r_k], %[q1], %[p1] \n\t" 259 "or %[s2], %[r_k], %[c] \n\t" 260 261 /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ 262 "shrl.qb %[s2], %[s2], 1 \n\t" 263 "addu_s.qb %[s1], %[s2], %[s3] \n\t" 264 "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" 265 "or %[r], %[r], %[c] \n\t" 266 "sll %[r], %[r], 24 \n\t" 267 268 "wrdsp %[r] \n\t" 269 "pick.qb %[s2], $0, %[ones] \n\t" 270 271 : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), 272 [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) 273 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), 274 [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) 275 ); 276 277 *hev = hev1; 278 *mask = s2; 279 *flat = flat1; 280} 281 282static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3, 283 uint32_t p2, uint32_t p1, 284 uint32_t p0, uint32_t q0, 285 uint32_t q1, uint32_t q2, 286 uint32_t q3, uint32_t q4, 287 uint32_t *flat2) { 288 uint32_t c, r, r_k, r_flat; 289 uint32_t ones = 0xFFFFFFFF; 290 uint32_t flat_thresh = 0x01010101; 291 uint32_t flat1, flat3; 292 293 __asm__ __volatile__ ( 294 /* flat |= (abs(p4 - p0) > thresh) */ 295 "subu_s.qb %[c], %[p4], %[p0] \n\t" 296 "subu_s.qb %[r_k], %[p0], %[p4] \n\t" 297 "or %[r_k], %[r_k], %[c] \n\t" 298 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 299 "or %[r], $0, %[c] \n\t" 300 301 /* flat |= (abs(q4 - q0) > thresh) */ 302 "subu_s.qb %[c], %[q4], %[q0] \n\t" 303 "subu_s.qb %[r_k], %[q0], %[q4] \n\t" 304 "or %[r_k], %[r_k], %[c] \n\t" 305 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 306 "or %[r], %[r], %[c] \n\t" 307 "sll %[r], %[r], 24 \n\t" 308 "wrdsp %[r] \n\t" 309 "pick.qb %[flat3], $0, %[ones] \n\t" 310 311 /* flat |= (abs(p1 - p0) > thresh) */ 312 "subu_s.qb %[c], %[p1], %[p0] \n\t" 313 "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 314 "or %[r_k], %[r_k], %[c] \n\t" 315 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 316 "or %[r_flat], $0, %[c] \n\t" 317 318 /* flat |= (abs(q1 - q0) > thresh) */ 319 "subu_s.qb %[c], %[q1], %[q0] \n\t" 320 "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 321 "or %[r_k], %[r_k], %[c] \n\t" 322 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 323 "or %[r_flat], %[r_flat], %[c] \n\t" 324 325 /* flat |= (abs(p0 - p2) > thresh) */ 326 "subu_s.qb %[c], %[p0], %[p2] \n\t" 327 "subu_s.qb %[r_k], %[p2], %[p0] \n\t" 328 "or %[r_k], %[r_k], %[c] \n\t" 329 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 330 "or %[r_flat], %[r_flat], %[c] \n\t" 331 332 /* flat |= (abs(q0 - q2) > thresh) */ 333 "subu_s.qb %[c], %[q0], %[q2] \n\t" 334 "subu_s.qb %[r_k], %[q2], %[q0] \n\t" 335 "or %[r_k], %[r_k], %[c] \n\t" 336 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 337 "or %[r_flat], %[r_flat], %[c] \n\t" 338 339 /* flat |= (abs(p3 - p0) > thresh) */ 340 "subu_s.qb %[c], %[p3], %[p0] \n\t" 341 "subu_s.qb %[r_k], %[p0], %[p3] \n\t" 342 "or %[r_k], %[r_k], %[c] \n\t" 343 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 344 "or %[r_flat], %[r_flat], %[c] \n\t" 345 346 /* flat |= (abs(q3 - q0) > thresh) */ 347 "subu_s.qb %[c], %[q3], %[q0] \n\t" 348 "subu_s.qb %[r_k], %[q0], %[q3] \n\t" 349 "or %[r_k], %[r_k], %[c] \n\t" 350 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 351 "or %[r_flat], %[r_flat], %[c] \n\t" 352 "sll %[r_flat], %[r_flat], 24 \n\t" 353 "wrdsp %[r_flat] \n\t" 354 "pick.qb %[flat1], $0, %[ones] \n\t" 355 /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */ 356 "and %[flat1], %[flat3], %[flat1] \n\t" 357 358 : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), 359 [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1), [flat3] "=&r" (flat3) 360 : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2), 361 [p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1), 362 [q2] "r" (q2), [q3] "r" (q3), [q4] "r" (q4), 363 [flat_thresh] "r" (flat_thresh), [ones] "r" (ones) 364 ); 365 366 *flat2 = flat1; 367} 368#endif // #if HAVE_DSPR2 369#ifdef __cplusplus 370} // extern "C" 371#endif 372 373#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 374