1/* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 12#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 13 14#include <stdlib.h> 15 16#include "./vp9_rtcd.h" 17#include "vp9/common/vp9_common.h" 18#include "vp9/common/vp9_onyxc_int.h" 19 20#if HAVE_DSPR2 21/* processing 4 pixels at the same time 22 * compute hev and mask in the same function */ 23static INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit, 24 uint32_t p1, uint32_t p0, 25 uint32_t p3, uint32_t p2, 26 uint32_t q0, uint32_t q1, 27 uint32_t q2, uint32_t q3, 28 uint32_t thresh, uint32_t *hev, 29 uint32_t *mask) { 30 uint32_t c, r, r3, r_k; 31 uint32_t s1, s2, s3; 32 uint32_t ones = 0xFFFFFFFF; 33 uint32_t hev1; 34 35 __asm__ __volatile__ ( 36 /* mask |= (abs(p3 - p2) > limit) */ 37 "subu_s.qb %[c], %[p3], %[p2] \n\t" 38 "subu_s.qb %[r_k], %[p2], %[p3] \n\t" 39 "or %[r_k], %[r_k], %[c] \n\t" 40 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 41 "or %[r], $0, %[c] \n\t" 42 43 /* mask |= (abs(p2 - p1) > limit) */ 44 "subu_s.qb %[c], %[p2], %[p1] \n\t" 45 "subu_s.qb %[r_k], %[p1], %[p2] \n\t" 46 "or %[r_k], %[r_k], %[c] \n\t" 47 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 48 "or %[r], %[r], %[c] \n\t" 49 50 /* mask |= (abs(p1 - p0) > limit) 51 * hev |= (abs(p1 - p0) > thresh) 52 */ 53 "subu_s.qb %[c], %[p1], %[p0] \n\t" 54 "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 55 "or %[r_k], %[r_k], %[c] \n\t" 56 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 57 "or %[r3], $0, %[c] \n\t" 58 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 59 "or %[r], %[r], %[c] \n\t" 60 61 /* mask |= (abs(q1 - q0) > limit) 62 * hev |= (abs(q1 - q0) > thresh) 63 */ 64 "subu_s.qb %[c], %[q1], %[q0] \n\t" 65 "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 66 "or %[r_k], %[r_k], %[c] \n\t" 67 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 68 "or %[r3], %[r3], %[c] \n\t" 69 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 70 "or %[r], %[r], %[c] \n\t" 71 72 /* mask |= (abs(q2 - q1) > limit) */ 73 "subu_s.qb %[c], %[q2], %[q1] \n\t" 74 "subu_s.qb %[r_k], %[q1], %[q2] \n\t" 75 "or %[r_k], %[r_k], %[c] \n\t" 76 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 77 "or %[r], %[r], %[c] \n\t" 78 "sll %[r3], %[r3], 24 \n\t" 79 80 /* mask |= (abs(q3 - q2) > limit) */ 81 "subu_s.qb %[c], %[q3], %[q2] \n\t" 82 "subu_s.qb %[r_k], %[q2], %[q3] \n\t" 83 "or %[r_k], %[r_k], %[c] \n\t" 84 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 85 "or %[r], %[r], %[c] \n\t" 86 87 : [c] "=&r" (c), [r_k] "=&r" (r_k), 88 [r] "=&r" (r), [r3] "=&r" (r3) 89 : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), 90 [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), 91 [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh) 92 ); 93 94 __asm__ __volatile__ ( 95 /* abs(p0 - q0) */ 96 "subu_s.qb %[c], %[p0], %[q0] \n\t" 97 "subu_s.qb %[r_k], %[q0], %[p0] \n\t" 98 "wrdsp %[r3] \n\t" 99 "or %[s1], %[r_k], %[c] \n\t" 100 101 /* abs(p1 - q1) */ 102 "subu_s.qb %[c], %[p1], %[q1] \n\t" 103 "addu_s.qb %[s3], %[s1], %[s1] \n\t" 104 "pick.qb %[hev1], %[ones], $0 \n\t" 105 "subu_s.qb %[r_k], %[q1], %[p1] \n\t" 106 "or %[s2], %[r_k], %[c] \n\t" 107 108 /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ 109 "shrl.qb %[s2], %[s2], 1 \n\t" 110 "addu_s.qb %[s1], %[s2], %[s3] \n\t" 111 "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" 112 "or %[r], %[r], %[c] \n\t" 113 "sll %[r], %[r], 24 \n\t" 114 115 "wrdsp %[r] \n\t" 116 "pick.qb %[s2], $0, %[ones] \n\t" 117 118 : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), 119 [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) 120 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), 121 [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) 122 ); 123 124 *hev = hev1; 125 *mask = s2; 126} 127 128static INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit, 129 uint32_t flimit, 130 uint32_t thresh, 131 uint32_t p1, uint32_t p0, 132 uint32_t p3, uint32_t p2, 133 uint32_t q0, uint32_t q1, 134 uint32_t q2, uint32_t q3, 135 uint32_t *hev, 136 uint32_t *mask, 137 uint32_t *flat) { 138 uint32_t c, r, r3, r_k, r_flat; 139 uint32_t s1, s2, s3; 140 uint32_t ones = 0xFFFFFFFF; 141 uint32_t flat_thresh = 0x01010101; 142 uint32_t hev1; 143 uint32_t flat1; 144 145 __asm__ __volatile__ ( 146 /* mask |= (abs(p3 - p2) > limit) */ 147 "subu_s.qb %[c], %[p3], %[p2] \n\t" 148 "subu_s.qb %[r_k], %[p2], %[p3] \n\t" 149 "or %[r_k], %[r_k], %[c] \n\t" 150 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 151 "or %[r], $0, %[c] \n\t" 152 153 /* mask |= (abs(p2 - p1) > limit) */ 154 "subu_s.qb %[c], %[p2], %[p1] \n\t" 155 "subu_s.qb %[r_k], %[p1], %[p2] \n\t" 156 "or %[r_k], %[r_k], %[c] \n\t" 157 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 158 "or %[r], %[r], %[c] \n\t" 159 160 /* mask |= (abs(p1 - p0) > limit) 161 * hev |= (abs(p1 - p0) > thresh) 162 * flat |= (abs(p1 - p0) > thresh) 163 */ 164 "subu_s.qb %[c], %[p1], %[p0] \n\t" 165 "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 166 "or %[r_k], %[r_k], %[c] \n\t" 167 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 168 "or %[r3], $0, %[c] \n\t" 169 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 170 "or %[r], %[r], %[c] \n\t" 171 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 172 "or %[r_flat], $0, %[c] \n\t" 173 174 /* mask |= (abs(q1 - q0) > limit) 175 * hev |= (abs(q1 - q0) > thresh) 176 * flat |= (abs(q1 - q0) > thresh) 177 */ 178 "subu_s.qb %[c], %[q1], %[q0] \n\t" 179 "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 180 "or %[r_k], %[r_k], %[c] \n\t" 181 "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 182 "or %[r3], %[r3], %[c] \n\t" 183 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 184 "or %[r], %[r], %[c] \n\t" 185 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 186 "or %[r_flat], %[r_flat], %[c] \n\t" 187 188 /* flat |= (abs(p0 - p2) > thresh) */ 189 "subu_s.qb %[c], %[p0], %[p2] \n\t" 190 "subu_s.qb %[r_k], %[p2], %[p0] \n\t" 191 "or %[r_k], %[r_k], %[c] \n\t" 192 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 193 "or %[r_flat], %[r_flat], %[c] \n\t" 194 195 /* flat |= (abs(q0 - q2) > thresh) */ 196 "subu_s.qb %[c], %[q0], %[q2] \n\t" 197 "subu_s.qb %[r_k], %[q2], %[q0] \n\t" 198 "or %[r_k], %[r_k], %[c] \n\t" 199 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 200 "or %[r_flat], %[r_flat], %[c] \n\t" 201 202 /* flat |= (abs(p3 - p0) > thresh) */ 203 "subu_s.qb %[c], %[p3], %[p0] \n\t" 204 "subu_s.qb %[r_k], %[p0], %[p3] \n\t" 205 "or %[r_k], %[r_k], %[c] \n\t" 206 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 207 "or %[r_flat], %[r_flat], %[c] \n\t" 208 209 /* flat |= (abs(q3 - q0) > thresh) */ 210 "subu_s.qb %[c], %[q3], %[q0] \n\t" 211 "subu_s.qb %[r_k], %[q0], %[q3] \n\t" 212 "or %[r_k], %[r_k], %[c] \n\t" 213 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 214 "or %[r_flat], %[r_flat], %[c] \n\t" 215 "sll %[r_flat], %[r_flat], 24 \n\t" 216 /* look at stall here */ 217 "wrdsp %[r_flat] \n\t" 218 "pick.qb %[flat1], $0, %[ones] \n\t" 219 220 /* mask |= (abs(q2 - q1) > limit) */ 221 "subu_s.qb %[c], %[q2], %[q1] \n\t" 222 "subu_s.qb %[r_k], %[q1], %[q2] \n\t" 223 "or %[r_k], %[r_k], %[c] \n\t" 224 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 225 "or %[r], %[r], %[c] \n\t" 226 "sll %[r3], %[r3], 24 \n\t" 227 228 /* mask |= (abs(q3 - q2) > limit) */ 229 "subu_s.qb %[c], %[q3], %[q2] \n\t" 230 "subu_s.qb %[r_k], %[q2], %[q3] \n\t" 231 "or %[r_k], %[r_k], %[c] \n\t" 232 "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 233 "or %[r], %[r], %[c] \n\t" 234 235 : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), [r3] "=&r" (r3), 236 [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1) 237 : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), 238 [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), 239 [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh), 240 [flat_thresh] "r" (flat_thresh), [ones] "r" (ones) 241 ); 242 243 __asm__ __volatile__ ( 244 /* abs(p0 - q0) */ 245 "subu_s.qb %[c], %[p0], %[q0] \n\t" 246 "subu_s.qb %[r_k], %[q0], %[p0] \n\t" 247 "wrdsp %[r3] \n\t" 248 "or %[s1], %[r_k], %[c] \n\t" 249 250 /* abs(p1 - q1) */ 251 "subu_s.qb %[c], %[p1], %[q1] \n\t" 252 "addu_s.qb %[s3], %[s1], %[s1] \n\t" 253 "pick.qb %[hev1], %[ones], $0 \n\t" 254 "subu_s.qb %[r_k], %[q1], %[p1] \n\t" 255 "or %[s2], %[r_k], %[c] \n\t" 256 257 /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ 258 "shrl.qb %[s2], %[s2], 1 \n\t" 259 "addu_s.qb %[s1], %[s2], %[s3] \n\t" 260 "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" 261 "or %[r], %[r], %[c] \n\t" 262 "sll %[r], %[r], 24 \n\t" 263 264 "wrdsp %[r] \n\t" 265 "pick.qb %[s2], $0, %[ones] \n\t" 266 267 : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), 268 [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) 269 : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), 270 [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) 271 ); 272 273 *hev = hev1; 274 *mask = s2; 275 *flat = flat1; 276} 277 278static INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3, 279 uint32_t p2, uint32_t p1, 280 uint32_t p0, uint32_t q0, 281 uint32_t q1, uint32_t q2, 282 uint32_t q3, uint32_t q4, 283 uint32_t *flat2) { 284 uint32_t c, r, r_k, r_flat; 285 uint32_t ones = 0xFFFFFFFF; 286 uint32_t flat_thresh = 0x01010101; 287 uint32_t flat1, flat3; 288 289 __asm__ __volatile__ ( 290 /* flat |= (abs(p4 - p0) > thresh) */ 291 "subu_s.qb %[c], %[p4], %[p0] \n\t" 292 "subu_s.qb %[r_k], %[p0], %[p4] \n\t" 293 "or %[r_k], %[r_k], %[c] \n\t" 294 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 295 "or %[r], $0, %[c] \n\t" 296 297 /* flat |= (abs(q4 - q0) > thresh) */ 298 "subu_s.qb %[c], %[q4], %[q0] \n\t" 299 "subu_s.qb %[r_k], %[q0], %[q4] \n\t" 300 "or %[r_k], %[r_k], %[c] \n\t" 301 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 302 "or %[r], %[r], %[c] \n\t" 303 "sll %[r], %[r], 24 \n\t" 304 "wrdsp %[r] \n\t" 305 "pick.qb %[flat3], $0, %[ones] \n\t" 306 307 /* flat |= (abs(p1 - p0) > thresh) */ 308 "subu_s.qb %[c], %[p1], %[p0] \n\t" 309 "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 310 "or %[r_k], %[r_k], %[c] \n\t" 311 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 312 "or %[r_flat], $0, %[c] \n\t" 313 314 /* flat |= (abs(q1 - q0) > thresh) */ 315 "subu_s.qb %[c], %[q1], %[q0] \n\t" 316 "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 317 "or %[r_k], %[r_k], %[c] \n\t" 318 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 319 "or %[r_flat], %[r_flat], %[c] \n\t" 320 321 /* flat |= (abs(p0 - p2) > thresh) */ 322 "subu_s.qb %[c], %[p0], %[p2] \n\t" 323 "subu_s.qb %[r_k], %[p2], %[p0] \n\t" 324 "or %[r_k], %[r_k], %[c] \n\t" 325 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 326 "or %[r_flat], %[r_flat], %[c] \n\t" 327 328 /* flat |= (abs(q0 - q2) > thresh) */ 329 "subu_s.qb %[c], %[q0], %[q2] \n\t" 330 "subu_s.qb %[r_k], %[q2], %[q0] \n\t" 331 "or %[r_k], %[r_k], %[c] \n\t" 332 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 333 "or %[r_flat], %[r_flat], %[c] \n\t" 334 335 /* flat |= (abs(p3 - p0) > thresh) */ 336 "subu_s.qb %[c], %[p3], %[p0] \n\t" 337 "subu_s.qb %[r_k], %[p0], %[p3] \n\t" 338 "or %[r_k], %[r_k], %[c] \n\t" 339 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 340 "or %[r_flat], %[r_flat], %[c] \n\t" 341 342 /* flat |= (abs(q3 - q0) > thresh) */ 343 "subu_s.qb %[c], %[q3], %[q0] \n\t" 344 "subu_s.qb %[r_k], %[q0], %[q3] \n\t" 345 "or %[r_k], %[r_k], %[c] \n\t" 346 "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 347 "or %[r_flat], %[r_flat], %[c] \n\t" 348 "sll %[r_flat], %[r_flat], 24 \n\t" 349 "wrdsp %[r_flat] \n\t" 350 "pick.qb %[flat1], $0, %[ones] \n\t" 351 /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */ 352 "and %[flat1], %[flat3], %[flat1] \n\t" 353 354 : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), 355 [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1), [flat3] "=&r" (flat3) 356 : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2), 357 [p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1), 358 [q2] "r" (q2), [q3] "r" (q3), [q4] "r" (q4), 359 [flat_thresh] "r" (flat_thresh), [ones] "r" (ones) 360 ); 361 362 *flat2 = flat1; 363} 364#endif // #if HAVE_DSPR2 365#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 366