1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include <stdlib.h> 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vp9_rtcd.h" 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_common.h" 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_onyxc_int.h" 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan#ifdef __cplusplus 21233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern "C" { 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_DSPR2 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* processing 4 pixels at the same time 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan * compute hev and mask in the same function */ 27233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit, 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t p1, uint32_t p0, 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t p3, uint32_t p2, 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t q0, uint32_t q1, 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t q2, uint32_t q3, 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t thresh, uint32_t *hev, 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t *mask) { 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t c, r, r3, r_k; 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t s1, s2, s3; 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t ones = 0xFFFFFFFF; 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t hev1; 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(p3 - p2) > limit) */ 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p3], %[p2] \n\t" 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p2], %[p3] \n\t" 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], $0, %[c] \n\t" 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(p2 - p1) > limit) */ 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p2], %[p1] \n\t" 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p1], %[p2] \n\t" 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(p1 - p0) > limit) 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan * hev |= (abs(p1 - p0) > thresh) 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p1], %[p0] \n\t" 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r3], $0, %[c] \n\t" 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(q1 - q0) > limit) 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan * hev |= (abs(q1 - q0) > thresh) 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q1], %[q0] \n\t" 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r3], %[r3], %[c] \n\t" 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(q2 - q1) > limit) */ 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q2], %[q1] \n\t" 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q1], %[q2] \n\t" 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sll %[r3], %[r3], 24 \n\t" 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(q3 - q2) > limit) */ 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q3], %[q2] \n\t" 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q2], %[q3] \n\t" 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [c] "=&r" (c), [r_k] "=&r" (r_k), 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan [r] "=&r" (r), [r3] "=&r" (r3) 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh) 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* abs(p0 - q0) */ 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p0], %[q0] \n\t" 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q0], %[p0] \n\t" 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan "wrdsp %[r3] \n\t" 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[s1], %[r_k], %[c] \n\t" 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* abs(p1 - q1) */ 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p1], %[q1] \n\t" 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addu_s.qb %[s3], %[s1], %[s1] \n\t" 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan "pick.qb %[hev1], %[ones], $0 \n\t" 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q1], %[p1] \n\t" 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[s2], %[r_k], %[c] \n\t" 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan "shrl.qb %[s2], %[s2], 1 \n\t" 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addu_s.qb %[s1], %[s2], %[s3] \n\t" 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sll %[r], %[r], 24 \n\t" 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan "wrdsp %[r] \n\t" 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan "pick.qb %[s2], $0, %[ones] \n\t" 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan *hev = hev1; 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan *mask = s2; 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan 132233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit, 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t flimit, 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t thresh, 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t p1, uint32_t p0, 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t p3, uint32_t p2, 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t q0, uint32_t q1, 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t q2, uint32_t q3, 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t *hev, 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t *mask, 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t *flat) { 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t c, r, r3, r_k, r_flat; 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t s1, s2, s3; 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t ones = 0xFFFFFFFF; 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t flat_thresh = 0x01010101; 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t hev1; 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t flat1; 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(p3 - p2) > limit) */ 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p3], %[p2] \n\t" 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p2], %[p3] \n\t" 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], $0, %[c] \n\t" 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(p2 - p1) > limit) */ 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p2], %[p1] \n\t" 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p1], %[p2] \n\t" 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(p1 - p0) > limit) 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan * hev |= (abs(p1 - p0) > thresh) 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan * flat |= (abs(p1 - p0) > thresh) 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p1], %[p0] \n\t" 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r3], $0, %[c] \n\t" 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], $0, %[c] \n\t" 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(q1 - q0) > limit) 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan * hev |= (abs(q1 - q0) > thresh) 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan * flat |= (abs(q1 - q0) > thresh) 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q1], %[q0] \n\t" 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r3], %[r3], %[c] \n\t" 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(p0 - p2) > thresh) */ 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p0], %[p2] \n\t" 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p2], %[p0] \n\t" 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(q0 - q2) > thresh) */ 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q0], %[q2] \n\t" 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q2], %[q0] \n\t" 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(p3 - p0) > thresh) */ 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p3], %[p0] \n\t" 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p0], %[p3] \n\t" 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(q3 - q0) > thresh) */ 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q3], %[q0] \n\t" 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q0], %[q3] \n\t" 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sll %[r_flat], %[r_flat], 24 \n\t" 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* look at stall here */ 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan "wrdsp %[r_flat] \n\t" 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan "pick.qb %[flat1], $0, %[ones] \n\t" 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(q2 - q1) > limit) */ 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q2], %[q1] \n\t" 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q1], %[q2] \n\t" 227233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sll %[r3], %[r3], 24 \n\t" 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* mask |= (abs(q3 - q2) > limit) */ 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q3], %[q2] \n\t" 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q2], %[q3] \n\t" 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), [r3] "=&r" (r3), 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1) 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh), 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan [flat_thresh] "r" (flat_thresh), [ones] "r" (ones) 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* abs(p0 - q0) */ 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p0], %[q0] \n\t" 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q0], %[p0] \n\t" 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan "wrdsp %[r3] \n\t" 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[s1], %[r_k], %[c] \n\t" 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* abs(p1 - q1) */ 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p1], %[q1] \n\t" 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addu_s.qb %[s3], %[s1], %[s1] \n\t" 257233d2500723e5594f3e7c70896ffeeef32b9c950ywan "pick.qb %[hev1], %[ones], $0 \n\t" 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q1], %[p1] \n\t" 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[s2], %[r_k], %[c] \n\t" 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan "shrl.qb %[s2], %[s2], 1 \n\t" 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addu_s.qb %[s1], %[s2], %[s3] \n\t" 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sll %[r], %[r], 24 \n\t" 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan "wrdsp %[r] \n\t" 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan "pick.qb %[s2], $0, %[ones] \n\t" 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan 271233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), 272233d2500723e5594f3e7c70896ffeeef32b9c950ywan [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan *hev = hev1; 278233d2500723e5594f3e7c70896ffeeef32b9c950ywan *mask = s2; 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan *flat = flat1; 280233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 281233d2500723e5594f3e7c70896ffeeef32b9c950ywan 282233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3, 283233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t p2, uint32_t p1, 284233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t p0, uint32_t q0, 285233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t q1, uint32_t q2, 286233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t q3, uint32_t q4, 287233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t *flat2) { 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t c, r, r_k, r_flat; 289233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t ones = 0xFFFFFFFF; 290233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t flat_thresh = 0x01010101; 291233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t flat1, flat3; 292233d2500723e5594f3e7c70896ffeeef32b9c950ywan 293233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 294233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(p4 - p0) > thresh) */ 295233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p4], %[p0] \n\t" 296233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p0], %[p4] \n\t" 297233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 298233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 299233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], $0, %[c] \n\t" 300233d2500723e5594f3e7c70896ffeeef32b9c950ywan 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(q4 - q0) > thresh) */ 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q4], %[q0] \n\t" 303233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q0], %[q4] \n\t" 304233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r], %[r], %[c] \n\t" 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sll %[r], %[r], 24 \n\t" 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan "wrdsp %[r] \n\t" 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan "pick.qb %[flat3], $0, %[ones] \n\t" 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(p1 - p0) > thresh) */ 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p1], %[p0] \n\t" 313233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 314233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], $0, %[c] \n\t" 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(q1 - q0) > thresh) */ 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q1], %[q0] \n\t" 320233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 322233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 323233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 324233d2500723e5594f3e7c70896ffeeef32b9c950ywan 325233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(p0 - p2) > thresh) */ 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p0], %[p2] \n\t" 327233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p2], %[p0] \n\t" 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(q0 - q2) > thresh) */ 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q0], %[q2] \n\t" 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q2], %[q0] \n\t" 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 337233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 338233d2500723e5594f3e7c70896ffeeef32b9c950ywan 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(p3 - p0) > thresh) */ 340233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[p3], %[p0] \n\t" 341233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[p0], %[p3] \n\t" 342233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 343233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 344233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 345233d2500723e5594f3e7c70896ffeeef32b9c950ywan 346233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat |= (abs(q3 - q0) > thresh) */ 347233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[c], %[q3], %[q0] \n\t" 348233d2500723e5594f3e7c70896ffeeef32b9c950ywan "subu_s.qb %[r_k], %[q0], %[q3] \n\t" 349233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_k], %[r_k], %[c] \n\t" 350233d2500723e5594f3e7c70896ffeeef32b9c950ywan "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 351233d2500723e5594f3e7c70896ffeeef32b9c950ywan "or %[r_flat], %[r_flat], %[c] \n\t" 352233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sll %[r_flat], %[r_flat], 24 \n\t" 353233d2500723e5594f3e7c70896ffeeef32b9c950ywan "wrdsp %[r_flat] \n\t" 354233d2500723e5594f3e7c70896ffeeef32b9c950ywan "pick.qb %[flat1], $0, %[ones] \n\t" 355233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */ 356233d2500723e5594f3e7c70896ffeeef32b9c950ywan "and %[flat1], %[flat3], %[flat1] \n\t" 357233d2500723e5594f3e7c70896ffeeef32b9c950ywan 358233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), 359233d2500723e5594f3e7c70896ffeeef32b9c950ywan [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1), [flat3] "=&r" (flat3) 360233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2), 361233d2500723e5594f3e7c70896ffeeef32b9c950ywan [p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1), 362233d2500723e5594f3e7c70896ffeeef32b9c950ywan [q2] "r" (q2), [q3] "r" (q3), [q4] "r" (q4), 363233d2500723e5594f3e7c70896ffeeef32b9c950ywan [flat_thresh] "r" (flat_thresh), [ones] "r" (ones) 364233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 365233d2500723e5594f3e7c70896ffeeef32b9c950ywan 366233d2500723e5594f3e7c70896ffeeef32b9c950ywan *flat2 = flat1; 367233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 368233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif // #if HAVE_DSPR2 369233d2500723e5594f3e7c70896ffeeef32b9c950ywan#ifdef __cplusplus 370233d2500723e5594f3e7c70896ffeeef32b9c950ywan} // extern "C" 371233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif 372233d2500723e5594f3e7c70896ffeeef32b9c950ywan 373233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 374