19b35249446b07f40ac5fcc3205f2c048616efacchkuang/* 29b35249446b07f40ac5fcc3205f2c048616efacchkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 39b35249446b07f40ac5fcc3205f2c048616efacchkuang * 49b35249446b07f40ac5fcc3205f2c048616efacchkuang * Use of this source code is governed by a BSD-style license 59b35249446b07f40ac5fcc3205f2c048616efacchkuang * that can be found in the LICENSE file in the root of the source 69b35249446b07f40ac5fcc3205f2c048616efacchkuang * tree. An additional intellectual property rights grant can be found 79b35249446b07f40ac5fcc3205f2c048616efacchkuang * in the file PATENTS. All contributing project authors may 89b35249446b07f40ac5fcc3205f2c048616efacchkuang * be found in the AUTHORS file in the root of the source tree. 99b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 109b35249446b07f40ac5fcc3205f2c048616efacchkuang 119b35249446b07f40ac5fcc3205f2c048616efacchkuang#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 129b35249446b07f40ac5fcc3205f2c048616efacchkuang#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 139b35249446b07f40ac5fcc3205f2c048616efacchkuang 149b35249446b07f40ac5fcc3205f2c048616efacchkuang#include <stdlib.h> 159b35249446b07f40ac5fcc3205f2c048616efacchkuang 169b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "./vp9_rtcd.h" 179b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_common.h" 189b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_onyxc_int.h" 199b35249446b07f40ac5fcc3205f2c048616efacchkuang 20b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian#ifdef __cplusplus 21b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianextern "C" { 22b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian#endif 23b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 249b35249446b07f40ac5fcc3205f2c048616efacchkuang#if HAVE_DSPR2 259b35249446b07f40ac5fcc3205f2c048616efacchkuang/* processing 4 pixels at the same time 269b35249446b07f40ac5fcc3205f2c048616efacchkuang * compute hev and mask in the same function */ 279b35249446b07f40ac5fcc3205f2c048616efacchkuangstatic INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit, 289b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t p1, uint32_t p0, 299b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t p3, uint32_t p2, 309b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t q0, uint32_t q1, 319b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t q2, uint32_t q3, 329b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t thresh, uint32_t *hev, 339b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t *mask) { 349b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t c, r, r3, r_k; 359b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t s1, s2, s3; 369b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t ones = 0xFFFFFFFF; 379b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t hev1; 389b35249446b07f40ac5fcc3205f2c048616efacchkuang 399b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 409b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(p3 - p2) > limit) */ 419b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p3], %[p2] \n\t" 429b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p2], %[p3] \n\t" 439b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 449b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 459b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], $0, %[c] \n\t" 469b35249446b07f40ac5fcc3205f2c048616efacchkuang 479b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(p2 - p1) > limit) */ 489b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p2], %[p1] \n\t" 499b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p1], %[p2] \n\t" 509b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 519b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 529b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 539b35249446b07f40ac5fcc3205f2c048616efacchkuang 549b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(p1 - p0) > limit) 559b35249446b07f40ac5fcc3205f2c048616efacchkuang * hev |= (abs(p1 - p0) > thresh) 569b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 579b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p1], %[p0] \n\t" 589b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 599b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 609b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 619b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r3], $0, %[c] \n\t" 629b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 639b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 649b35249446b07f40ac5fcc3205f2c048616efacchkuang 659b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(q1 - q0) > limit) 669b35249446b07f40ac5fcc3205f2c048616efacchkuang * hev |= (abs(q1 - q0) > thresh) 679b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 689b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q1], %[q0] \n\t" 699b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 709b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 719b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 729b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r3], %[r3], %[c] \n\t" 739b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 749b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 759b35249446b07f40ac5fcc3205f2c048616efacchkuang 769b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(q2 - q1) > limit) */ 779b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q2], %[q1] \n\t" 789b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q1], %[q2] \n\t" 799b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 809b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 819b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 829b35249446b07f40ac5fcc3205f2c048616efacchkuang "sll %[r3], %[r3], 24 \n\t" 839b35249446b07f40ac5fcc3205f2c048616efacchkuang 849b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(q3 - q2) > limit) */ 859b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q3], %[q2] \n\t" 869b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q2], %[q3] \n\t" 879b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 889b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 899b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 909b35249446b07f40ac5fcc3205f2c048616efacchkuang 919b35249446b07f40ac5fcc3205f2c048616efacchkuang : [c] "=&r" (c), [r_k] "=&r" (r_k), 929b35249446b07f40ac5fcc3205f2c048616efacchkuang [r] "=&r" (r), [r3] "=&r" (r3) 939b35249446b07f40ac5fcc3205f2c048616efacchkuang : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), 949b35249446b07f40ac5fcc3205f2c048616efacchkuang [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), 959b35249446b07f40ac5fcc3205f2c048616efacchkuang [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh) 969b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 979b35249446b07f40ac5fcc3205f2c048616efacchkuang 989b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 999b35249446b07f40ac5fcc3205f2c048616efacchkuang /* abs(p0 - q0) */ 1009b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p0], %[q0] \n\t" 1019b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q0], %[p0] \n\t" 1029b35249446b07f40ac5fcc3205f2c048616efacchkuang "wrdsp %[r3] \n\t" 1039b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[s1], %[r_k], %[c] \n\t" 1049b35249446b07f40ac5fcc3205f2c048616efacchkuang 1059b35249446b07f40ac5fcc3205f2c048616efacchkuang /* abs(p1 - q1) */ 1069b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p1], %[q1] \n\t" 1079b35249446b07f40ac5fcc3205f2c048616efacchkuang "addu_s.qb %[s3], %[s1], %[s1] \n\t" 1089b35249446b07f40ac5fcc3205f2c048616efacchkuang "pick.qb %[hev1], %[ones], $0 \n\t" 1099b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q1], %[p1] \n\t" 1109b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[s2], %[r_k], %[c] \n\t" 1119b35249446b07f40ac5fcc3205f2c048616efacchkuang 1129b35249446b07f40ac5fcc3205f2c048616efacchkuang /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ 1139b35249446b07f40ac5fcc3205f2c048616efacchkuang "shrl.qb %[s2], %[s2], 1 \n\t" 1149b35249446b07f40ac5fcc3205f2c048616efacchkuang "addu_s.qb %[s1], %[s2], %[s3] \n\t" 1159b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" 1169b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 1179b35249446b07f40ac5fcc3205f2c048616efacchkuang "sll %[r], %[r], 24 \n\t" 1189b35249446b07f40ac5fcc3205f2c048616efacchkuang 1199b35249446b07f40ac5fcc3205f2c048616efacchkuang "wrdsp %[r] \n\t" 1209b35249446b07f40ac5fcc3205f2c048616efacchkuang "pick.qb %[s2], $0, %[ones] \n\t" 1219b35249446b07f40ac5fcc3205f2c048616efacchkuang 1229b35249446b07f40ac5fcc3205f2c048616efacchkuang : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), 1239b35249446b07f40ac5fcc3205f2c048616efacchkuang [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) 1249b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), 1259b35249446b07f40ac5fcc3205f2c048616efacchkuang [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) 1269b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 1279b35249446b07f40ac5fcc3205f2c048616efacchkuang 1289b35249446b07f40ac5fcc3205f2c048616efacchkuang *hev = hev1; 1299b35249446b07f40ac5fcc3205f2c048616efacchkuang *mask = s2; 1309b35249446b07f40ac5fcc3205f2c048616efacchkuang} 1319b35249446b07f40ac5fcc3205f2c048616efacchkuang 1329b35249446b07f40ac5fcc3205f2c048616efacchkuangstatic INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit, 1339b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t flimit, 1349b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t thresh, 1359b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t p1, uint32_t p0, 1369b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t p3, uint32_t p2, 1379b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t q0, uint32_t q1, 1389b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t q2, uint32_t q3, 1399b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t *hev, 1409b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t *mask, 1419b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t *flat) { 1429b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t c, r, r3, r_k, r_flat; 1439b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t s1, s2, s3; 1449b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t ones = 0xFFFFFFFF; 1459b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t flat_thresh = 0x01010101; 1469b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t hev1; 1479b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t flat1; 1489b35249446b07f40ac5fcc3205f2c048616efacchkuang 1499b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 1509b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(p3 - p2) > limit) */ 1519b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p3], %[p2] \n\t" 1529b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p2], %[p3] \n\t" 1539b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 1549b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 1559b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], $0, %[c] \n\t" 1569b35249446b07f40ac5fcc3205f2c048616efacchkuang 1579b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(p2 - p1) > limit) */ 1589b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p2], %[p1] \n\t" 1599b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p1], %[p2] \n\t" 1609b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 1619b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 1629b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 1639b35249446b07f40ac5fcc3205f2c048616efacchkuang 1649b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(p1 - p0) > limit) 1659b35249446b07f40ac5fcc3205f2c048616efacchkuang * hev |= (abs(p1 - p0) > thresh) 1669b35249446b07f40ac5fcc3205f2c048616efacchkuang * flat |= (abs(p1 - p0) > thresh) 1679b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 1689b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p1], %[p0] \n\t" 1699b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 1709b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 1719b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 1729b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r3], $0, %[c] \n\t" 1739b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 1749b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 1759b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 1769b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], $0, %[c] \n\t" 1779b35249446b07f40ac5fcc3205f2c048616efacchkuang 1789b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(q1 - q0) > limit) 1799b35249446b07f40ac5fcc3205f2c048616efacchkuang * hev |= (abs(q1 - q0) > thresh) 1809b35249446b07f40ac5fcc3205f2c048616efacchkuang * flat |= (abs(q1 - q0) > thresh) 1819b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 1829b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q1], %[q0] \n\t" 1839b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 1849b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 1859b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" 1869b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r3], %[r3], %[c] \n\t" 1879b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 1889b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 1899b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 1909b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 1919b35249446b07f40ac5fcc3205f2c048616efacchkuang 1929b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(p0 - p2) > thresh) */ 1939b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p0], %[p2] \n\t" 1949b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p2], %[p0] \n\t" 1959b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 1969b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 1979b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 1989b35249446b07f40ac5fcc3205f2c048616efacchkuang 1999b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(q0 - q2) > thresh) */ 2009b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q0], %[q2] \n\t" 2019b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q2], %[q0] \n\t" 2029b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 2039b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 2049b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 2059b35249446b07f40ac5fcc3205f2c048616efacchkuang 2069b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(p3 - p0) > thresh) */ 2079b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p3], %[p0] \n\t" 2089b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p0], %[p3] \n\t" 2099b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 2109b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 2119b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 2129b35249446b07f40ac5fcc3205f2c048616efacchkuang 2139b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(q3 - q0) > thresh) */ 2149b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q3], %[q0] \n\t" 2159b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q0], %[q3] \n\t" 2169b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 2179b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 2189b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 2199b35249446b07f40ac5fcc3205f2c048616efacchkuang "sll %[r_flat], %[r_flat], 24 \n\t" 2209b35249446b07f40ac5fcc3205f2c048616efacchkuang /* look at stall here */ 2219b35249446b07f40ac5fcc3205f2c048616efacchkuang "wrdsp %[r_flat] \n\t" 2229b35249446b07f40ac5fcc3205f2c048616efacchkuang "pick.qb %[flat1], $0, %[ones] \n\t" 2239b35249446b07f40ac5fcc3205f2c048616efacchkuang 2249b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(q2 - q1) > limit) */ 2259b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q2], %[q1] \n\t" 2269b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q1], %[q2] \n\t" 2279b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 2289b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 2299b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 2309b35249446b07f40ac5fcc3205f2c048616efacchkuang "sll %[r3], %[r3], 24 \n\t" 2319b35249446b07f40ac5fcc3205f2c048616efacchkuang 2329b35249446b07f40ac5fcc3205f2c048616efacchkuang /* mask |= (abs(q3 - q2) > limit) */ 2339b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q3], %[q2] \n\t" 2349b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q2], %[q3] \n\t" 2359b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 2369b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" 2379b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 2389b35249446b07f40ac5fcc3205f2c048616efacchkuang 2399b35249446b07f40ac5fcc3205f2c048616efacchkuang : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), [r3] "=&r" (r3), 2409b35249446b07f40ac5fcc3205f2c048616efacchkuang [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1) 2419b35249446b07f40ac5fcc3205f2c048616efacchkuang : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2), 2429b35249446b07f40ac5fcc3205f2c048616efacchkuang [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0), 2439b35249446b07f40ac5fcc3205f2c048616efacchkuang [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh), 2449b35249446b07f40ac5fcc3205f2c048616efacchkuang [flat_thresh] "r" (flat_thresh), [ones] "r" (ones) 2459b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 2469b35249446b07f40ac5fcc3205f2c048616efacchkuang 2479b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 2489b35249446b07f40ac5fcc3205f2c048616efacchkuang /* abs(p0 - q0) */ 2499b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p0], %[q0] \n\t" 2509b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q0], %[p0] \n\t" 2519b35249446b07f40ac5fcc3205f2c048616efacchkuang "wrdsp %[r3] \n\t" 2529b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[s1], %[r_k], %[c] \n\t" 2539b35249446b07f40ac5fcc3205f2c048616efacchkuang 2549b35249446b07f40ac5fcc3205f2c048616efacchkuang /* abs(p1 - q1) */ 2559b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p1], %[q1] \n\t" 2569b35249446b07f40ac5fcc3205f2c048616efacchkuang "addu_s.qb %[s3], %[s1], %[s1] \n\t" 2579b35249446b07f40ac5fcc3205f2c048616efacchkuang "pick.qb %[hev1], %[ones], $0 \n\t" 2589b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q1], %[p1] \n\t" 2599b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[s2], %[r_k], %[c] \n\t" 2609b35249446b07f40ac5fcc3205f2c048616efacchkuang 2619b35249446b07f40ac5fcc3205f2c048616efacchkuang /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ 2629b35249446b07f40ac5fcc3205f2c048616efacchkuang "shrl.qb %[s2], %[s2], 1 \n\t" 2639b35249446b07f40ac5fcc3205f2c048616efacchkuang "addu_s.qb %[s1], %[s2], %[s3] \n\t" 2649b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" 2659b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 2669b35249446b07f40ac5fcc3205f2c048616efacchkuang "sll %[r], %[r], 24 \n\t" 2679b35249446b07f40ac5fcc3205f2c048616efacchkuang 2689b35249446b07f40ac5fcc3205f2c048616efacchkuang "wrdsp %[r] \n\t" 2699b35249446b07f40ac5fcc3205f2c048616efacchkuang "pick.qb %[s2], $0, %[ones] \n\t" 2709b35249446b07f40ac5fcc3205f2c048616efacchkuang 2719b35249446b07f40ac5fcc3205f2c048616efacchkuang : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1), 2729b35249446b07f40ac5fcc3205f2c048616efacchkuang [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3) 2739b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3), 2749b35249446b07f40ac5fcc3205f2c048616efacchkuang [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit) 2759b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 2769b35249446b07f40ac5fcc3205f2c048616efacchkuang 2779b35249446b07f40ac5fcc3205f2c048616efacchkuang *hev = hev1; 2789b35249446b07f40ac5fcc3205f2c048616efacchkuang *mask = s2; 2799b35249446b07f40ac5fcc3205f2c048616efacchkuang *flat = flat1; 2809b35249446b07f40ac5fcc3205f2c048616efacchkuang} 2819b35249446b07f40ac5fcc3205f2c048616efacchkuang 2829b35249446b07f40ac5fcc3205f2c048616efacchkuangstatic INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3, 2839b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t p2, uint32_t p1, 2849b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t p0, uint32_t q0, 2859b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t q1, uint32_t q2, 2869b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t q3, uint32_t q4, 2879b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t *flat2) { 2889b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t c, r, r_k, r_flat; 2899b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t ones = 0xFFFFFFFF; 2909b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t flat_thresh = 0x01010101; 2919b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t flat1, flat3; 2929b35249446b07f40ac5fcc3205f2c048616efacchkuang 2939b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 2949b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(p4 - p0) > thresh) */ 2959b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p4], %[p0] \n\t" 2969b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p0], %[p4] \n\t" 2979b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 2989b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 2999b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], $0, %[c] \n\t" 3009b35249446b07f40ac5fcc3205f2c048616efacchkuang 3019b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(q4 - q0) > thresh) */ 3029b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q4], %[q0] \n\t" 3039b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q0], %[q4] \n\t" 3049b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 3059b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 3069b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r], %[r], %[c] \n\t" 3079b35249446b07f40ac5fcc3205f2c048616efacchkuang "sll %[r], %[r], 24 \n\t" 3089b35249446b07f40ac5fcc3205f2c048616efacchkuang "wrdsp %[r] \n\t" 3099b35249446b07f40ac5fcc3205f2c048616efacchkuang "pick.qb %[flat3], $0, %[ones] \n\t" 3109b35249446b07f40ac5fcc3205f2c048616efacchkuang 3119b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(p1 - p0) > thresh) */ 3129b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p1], %[p0] \n\t" 3139b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p0], %[p1] \n\t" 3149b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 3159b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 3169b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], $0, %[c] \n\t" 3179b35249446b07f40ac5fcc3205f2c048616efacchkuang 3189b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(q1 - q0) > thresh) */ 3199b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q1], %[q0] \n\t" 3209b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q0], %[q1] \n\t" 3219b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 3229b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 3239b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 3249b35249446b07f40ac5fcc3205f2c048616efacchkuang 3259b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(p0 - p2) > thresh) */ 3269b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p0], %[p2] \n\t" 3279b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p2], %[p0] \n\t" 3289b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 3299b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 3309b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 3319b35249446b07f40ac5fcc3205f2c048616efacchkuang 3329b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(q0 - q2) > thresh) */ 3339b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q0], %[q2] \n\t" 3349b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q2], %[q0] \n\t" 3359b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 3369b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 3379b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 3389b35249446b07f40ac5fcc3205f2c048616efacchkuang 3399b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(p3 - p0) > thresh) */ 3409b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[p3], %[p0] \n\t" 3419b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[p0], %[p3] \n\t" 3429b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 3439b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 3449b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 3459b35249446b07f40ac5fcc3205f2c048616efacchkuang 3469b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat |= (abs(q3 - q0) > thresh) */ 3479b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[c], %[q3], %[q0] \n\t" 3489b35249446b07f40ac5fcc3205f2c048616efacchkuang "subu_s.qb %[r_k], %[q0], %[q3] \n\t" 3499b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_k], %[r_k], %[c] \n\t" 3509b35249446b07f40ac5fcc3205f2c048616efacchkuang "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" 3519b35249446b07f40ac5fcc3205f2c048616efacchkuang "or %[r_flat], %[r_flat], %[c] \n\t" 3529b35249446b07f40ac5fcc3205f2c048616efacchkuang "sll %[r_flat], %[r_flat], 24 \n\t" 3539b35249446b07f40ac5fcc3205f2c048616efacchkuang "wrdsp %[r_flat] \n\t" 3549b35249446b07f40ac5fcc3205f2c048616efacchkuang "pick.qb %[flat1], $0, %[ones] \n\t" 3559b35249446b07f40ac5fcc3205f2c048616efacchkuang /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */ 3569b35249446b07f40ac5fcc3205f2c048616efacchkuang "and %[flat1], %[flat3], %[flat1] \n\t" 3579b35249446b07f40ac5fcc3205f2c048616efacchkuang 3589b35249446b07f40ac5fcc3205f2c048616efacchkuang : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), 3599b35249446b07f40ac5fcc3205f2c048616efacchkuang [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1), [flat3] "=&r" (flat3) 3609b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2), 3619b35249446b07f40ac5fcc3205f2c048616efacchkuang [p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1), 3629b35249446b07f40ac5fcc3205f2c048616efacchkuang [q2] "r" (q2), [q3] "r" (q3), [q4] "r" (q4), 3639b35249446b07f40ac5fcc3205f2c048616efacchkuang [flat_thresh] "r" (flat_thresh), [ones] "r" (ones) 3649b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 3659b35249446b07f40ac5fcc3205f2c048616efacchkuang 3669b35249446b07f40ac5fcc3205f2c048616efacchkuang *flat2 = flat1; 3679b35249446b07f40ac5fcc3205f2c048616efacchkuang} 3689b35249446b07f40ac5fcc3205f2c048616efacchkuang#endif // #if HAVE_DSPR2 369b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian#ifdef __cplusplus 370b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian} // extern "C" 371b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian#endif 372b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 3739b35249446b07f40ac5fcc3205f2c048616efacchkuang#endif // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_ 374