19b35249446b07f40ac5fcc3205f2c048616efacchkuang/*
29b35249446b07f40ac5fcc3205f2c048616efacchkuang *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
39b35249446b07f40ac5fcc3205f2c048616efacchkuang *
49b35249446b07f40ac5fcc3205f2c048616efacchkuang *  Use of this source code is governed by a BSD-style license
59b35249446b07f40ac5fcc3205f2c048616efacchkuang *  that can be found in the LICENSE file in the root of the source
69b35249446b07f40ac5fcc3205f2c048616efacchkuang *  tree. An additional intellectual property rights grant can be found
79b35249446b07f40ac5fcc3205f2c048616efacchkuang *  in the file PATENTS.  All contributing project authors may
89b35249446b07f40ac5fcc3205f2c048616efacchkuang *  be found in the AUTHORS file in the root of the source tree.
99b35249446b07f40ac5fcc3205f2c048616efacchkuang */
109b35249446b07f40ac5fcc3205f2c048616efacchkuang
119b35249446b07f40ac5fcc3205f2c048616efacchkuang#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
129b35249446b07f40ac5fcc3205f2c048616efacchkuang#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
139b35249446b07f40ac5fcc3205f2c048616efacchkuang
149b35249446b07f40ac5fcc3205f2c048616efacchkuang#include <stdlib.h>
159b35249446b07f40ac5fcc3205f2c048616efacchkuang
169b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "./vp9_rtcd.h"
179b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_common.h"
189b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_onyxc_int.h"
199b35249446b07f40ac5fcc3205f2c048616efacchkuang
20b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian#ifdef __cplusplus
21b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianextern "C" {
22b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian#endif
23b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
249b35249446b07f40ac5fcc3205f2c048616efacchkuang#if HAVE_DSPR2
259b35249446b07f40ac5fcc3205f2c048616efacchkuang/* processing 4 pixels at the same time
269b35249446b07f40ac5fcc3205f2c048616efacchkuang * compute hev and mask in the same function */
279b35249446b07f40ac5fcc3205f2c048616efacchkuangstatic INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
289b35249446b07f40ac5fcc3205f2c048616efacchkuang                                             uint32_t p1, uint32_t p0,
299b35249446b07f40ac5fcc3205f2c048616efacchkuang                                             uint32_t p3, uint32_t p2,
309b35249446b07f40ac5fcc3205f2c048616efacchkuang                                             uint32_t q0, uint32_t q1,
319b35249446b07f40ac5fcc3205f2c048616efacchkuang                                             uint32_t q2, uint32_t q3,
329b35249446b07f40ac5fcc3205f2c048616efacchkuang                                             uint32_t thresh, uint32_t *hev,
339b35249446b07f40ac5fcc3205f2c048616efacchkuang                                             uint32_t *mask) {
349b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  c, r, r3, r_k;
359b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  s1, s2, s3;
369b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  ones = 0xFFFFFFFF;
379b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  hev1;
389b35249446b07f40ac5fcc3205f2c048616efacchkuang
399b35249446b07f40ac5fcc3205f2c048616efacchkuang  __asm__ __volatile__ (
409b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(p3 - p2) > limit) */
419b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],   %[p3],     %[p2]        \n\t"
429b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k], %[p2],     %[p3]        \n\t"
439b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k], %[r_k],    %[c]         \n\t"
449b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
459b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],   $0,        %[c]         \n\t"
469b35249446b07f40ac5fcc3205f2c048616efacchkuang
479b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(p2 - p1) > limit) */
489b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],   %[p2],     %[p1]        \n\t"
499b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k], %[p1],     %[p2]        \n\t"
509b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k], %[r_k],    %[c]         \n\t"
519b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
529b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],   %[r],      %[c]         \n\t"
539b35249446b07f40ac5fcc3205f2c048616efacchkuang
549b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(p1 - p0) > limit)
559b35249446b07f40ac5fcc3205f2c048616efacchkuang       * hev  |= (abs(p1 - p0) > thresh)
569b35249446b07f40ac5fcc3205f2c048616efacchkuang       */
579b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],   %[p1],     %[p0]        \n\t"
589b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k], %[p0],     %[p1]        \n\t"
599b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k], %[r_k],    %[c]         \n\t"
609b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],   %[thresh], %[r_k]       \n\t"
619b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r3],  $0,        %[c]         \n\t"
629b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
639b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],   %[r],      %[c]         \n\t"
649b35249446b07f40ac5fcc3205f2c048616efacchkuang
659b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(q1 - q0) > limit)
669b35249446b07f40ac5fcc3205f2c048616efacchkuang       * hev  |= (abs(q1 - q0) > thresh)
679b35249446b07f40ac5fcc3205f2c048616efacchkuang       */
689b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],   %[q1],     %[q0]        \n\t"
699b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k], %[q0],     %[q1]        \n\t"
709b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k], %[r_k],    %[c]         \n\t"
719b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],   %[thresh], %[r_k]       \n\t"
729b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r3],  %[r3],     %[c]         \n\t"
739b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
749b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],   %[r],      %[c]         \n\t"
759b35249446b07f40ac5fcc3205f2c048616efacchkuang
769b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(q2 - q1) > limit) */
779b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],   %[q2],     %[q1]        \n\t"
789b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k], %[q1],     %[q2]        \n\t"
799b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k], %[r_k],    %[c]         \n\t"
809b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
819b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],   %[r],      %[c]         \n\t"
829b35249446b07f40ac5fcc3205f2c048616efacchkuang      "sll            %[r3],    %[r3],    24          \n\t"
839b35249446b07f40ac5fcc3205f2c048616efacchkuang
849b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(q3 - q2) > limit) */
859b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],   %[q3],     %[q2]        \n\t"
869b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k], %[q2],     %[q3]        \n\t"
879b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k], %[r_k],    %[c]         \n\t"
889b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
899b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],   %[r],      %[c]         \n\t"
909b35249446b07f40ac5fcc3205f2c048616efacchkuang
919b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [c] "=&r" (c), [r_k] "=&r" (r_k),
929b35249446b07f40ac5fcc3205f2c048616efacchkuang        [r] "=&r" (r), [r3] "=&r" (r3)
939b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2),
949b35249446b07f40ac5fcc3205f2c048616efacchkuang        [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0),
959b35249446b07f40ac5fcc3205f2c048616efacchkuang        [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh)
969b35249446b07f40ac5fcc3205f2c048616efacchkuang  );
979b35249446b07f40ac5fcc3205f2c048616efacchkuang
989b35249446b07f40ac5fcc3205f2c048616efacchkuang  __asm__ __volatile__ (
999b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* abs(p0 - q0) */
1009b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],   %[p0],     %[q0]        \n\t"
1019b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k], %[q0],     %[p0]        \n\t"
1029b35249446b07f40ac5fcc3205f2c048616efacchkuang      "wrdsp          %[r3]                           \n\t"
1039b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[s1],  %[r_k],    %[c]         \n\t"
1049b35249446b07f40ac5fcc3205f2c048616efacchkuang
1059b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* abs(p1 - q1) */
1069b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],    %[p1],    %[q1]        \n\t"
1079b35249446b07f40ac5fcc3205f2c048616efacchkuang      "addu_s.qb      %[s3],   %[s1],    %[s1]        \n\t"
1089b35249446b07f40ac5fcc3205f2c048616efacchkuang      "pick.qb        %[hev1], %[ones],  $0           \n\t"
1099b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],  %[q1],    %[p1]        \n\t"
1109b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[s2],   %[r_k],   %[c]         \n\t"
1119b35249446b07f40ac5fcc3205f2c048616efacchkuang
1129b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > flimit * 2 + limit */
1139b35249446b07f40ac5fcc3205f2c048616efacchkuang      "shrl.qb        %[s2],   %[s2],     1           \n\t"
1149b35249446b07f40ac5fcc3205f2c048616efacchkuang      "addu_s.qb      %[s1],   %[s2],     %[s3]       \n\t"
1159b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],    %[flimit], %[s1]       \n\t"
1169b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],    %[r],      %[c]        \n\t"
1179b35249446b07f40ac5fcc3205f2c048616efacchkuang      "sll            %[r],    %[r],      24          \n\t"
1189b35249446b07f40ac5fcc3205f2c048616efacchkuang
1199b35249446b07f40ac5fcc3205f2c048616efacchkuang      "wrdsp          %[r]                            \n\t"
1209b35249446b07f40ac5fcc3205f2c048616efacchkuang      "pick.qb        %[s2],  $0,         %[ones]     \n\t"
1219b35249446b07f40ac5fcc3205f2c048616efacchkuang
1229b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1),
1239b35249446b07f40ac5fcc3205f2c048616efacchkuang        [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3)
1249b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3),
1259b35249446b07f40ac5fcc3205f2c048616efacchkuang        [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit)
1269b35249446b07f40ac5fcc3205f2c048616efacchkuang  );
1279b35249446b07f40ac5fcc3205f2c048616efacchkuang
1289b35249446b07f40ac5fcc3205f2c048616efacchkuang  *hev = hev1;
1299b35249446b07f40ac5fcc3205f2c048616efacchkuang  *mask = s2;
1309b35249446b07f40ac5fcc3205f2c048616efacchkuang}
1319b35249446b07f40ac5fcc3205f2c048616efacchkuang
1329b35249446b07f40ac5fcc3205f2c048616efacchkuangstatic INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit,
1339b35249446b07f40ac5fcc3205f2c048616efacchkuang                                                       uint32_t flimit,
1349b35249446b07f40ac5fcc3205f2c048616efacchkuang                                                       uint32_t thresh,
1359b35249446b07f40ac5fcc3205f2c048616efacchkuang                                                       uint32_t p1, uint32_t p0,
1369b35249446b07f40ac5fcc3205f2c048616efacchkuang                                                       uint32_t p3, uint32_t p2,
1379b35249446b07f40ac5fcc3205f2c048616efacchkuang                                                       uint32_t q0, uint32_t q1,
1389b35249446b07f40ac5fcc3205f2c048616efacchkuang                                                       uint32_t q2, uint32_t q3,
1399b35249446b07f40ac5fcc3205f2c048616efacchkuang                                                       uint32_t *hev,
1409b35249446b07f40ac5fcc3205f2c048616efacchkuang                                                       uint32_t *mask,
1419b35249446b07f40ac5fcc3205f2c048616efacchkuang                                                       uint32_t *flat) {
1429b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  c, r, r3, r_k, r_flat;
1439b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  s1, s2, s3;
1449b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  ones = 0xFFFFFFFF;
1459b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  flat_thresh = 0x01010101;
1469b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  hev1;
1479b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  flat1;
1489b35249446b07f40ac5fcc3205f2c048616efacchkuang
1499b35249446b07f40ac5fcc3205f2c048616efacchkuang  __asm__ __volatile__ (
1509b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(p3 - p2) > limit) */
1519b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[p3],          %[p2]        \n\t"
1529b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[p2],          %[p3]        \n\t"
1539b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
1549b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
1559b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],       $0,             %[c]         \n\t"
1569b35249446b07f40ac5fcc3205f2c048616efacchkuang
1579b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(p2 - p1) > limit) */
1589b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[p2],          %[p1]        \n\t"
1599b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[p1],          %[p2]        \n\t"
1609b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
1619b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
1629b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],       %[r],           %[c]         \n\t"
1639b35249446b07f40ac5fcc3205f2c048616efacchkuang
1649b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(p1 - p0) > limit)
1659b35249446b07f40ac5fcc3205f2c048616efacchkuang       * hev  |= (abs(p1 - p0) > thresh)
1669b35249446b07f40ac5fcc3205f2c048616efacchkuang       * flat |= (abs(p1 - p0) > thresh)
1679b35249446b07f40ac5fcc3205f2c048616efacchkuang       */
1689b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[p1],          %[p0]        \n\t"
1699b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[p0],          %[p1]        \n\t"
1709b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
1719b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[thresh],      %[r_k]       \n\t"
1729b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r3],      $0,             %[c]         \n\t"
1739b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
1749b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],       %[r],           %[c]         \n\t"
1759b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
1769b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  $0,             %[c]         \n\t"
1779b35249446b07f40ac5fcc3205f2c048616efacchkuang
1789b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(q1 - q0) > limit)
1799b35249446b07f40ac5fcc3205f2c048616efacchkuang       * hev  |= (abs(q1 - q0) > thresh)
1809b35249446b07f40ac5fcc3205f2c048616efacchkuang       * flat |= (abs(q1 - q0) > thresh)
1819b35249446b07f40ac5fcc3205f2c048616efacchkuang       */
1829b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[q1],          %[q0]        \n\t"
1839b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[q0],          %[q1]        \n\t"
1849b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
1859b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[thresh],      %[r_k]       \n\t"
1869b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r3],      %[r3],          %[c]         \n\t"
1879b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
1889b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],       %[r],           %[c]         \n\t"
1899b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
1909b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
1919b35249446b07f40ac5fcc3205f2c048616efacchkuang
1929b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(p0 - p2) > thresh) */
1939b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[p0],          %[p2]        \n\t"
1949b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[p2],          %[p0]        \n\t"
1959b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
1969b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
1979b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
1989b35249446b07f40ac5fcc3205f2c048616efacchkuang
1999b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(q0 - q2) > thresh) */
2009b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[q0],          %[q2]        \n\t"
2019b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[q2],          %[q0]        \n\t"
2029b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
2039b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
2049b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
2059b35249446b07f40ac5fcc3205f2c048616efacchkuang
2069b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(p3 - p0) > thresh) */
2079b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[p3],          %[p0]        \n\t"
2089b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[p0],          %[p3]        \n\t"
2099b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
2109b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
2119b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
2129b35249446b07f40ac5fcc3205f2c048616efacchkuang
2139b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(q3 - q0) > thresh) */
2149b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[q3],          %[q0]        \n\t"
2159b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[q0],          %[q3]        \n\t"
2169b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
2179b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
2189b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
2199b35249446b07f40ac5fcc3205f2c048616efacchkuang      "sll            %[r_flat],  %[r_flat],      24           \n\t"
2209b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* look at stall here */
2219b35249446b07f40ac5fcc3205f2c048616efacchkuang      "wrdsp          %[r_flat]                                \n\t"
2229b35249446b07f40ac5fcc3205f2c048616efacchkuang      "pick.qb        %[flat1],   $0,             %[ones]      \n\t"
2239b35249446b07f40ac5fcc3205f2c048616efacchkuang
2249b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(q2 - q1) > limit) */
2259b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[q2],          %[q1]        \n\t"
2269b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[q1],          %[q2]        \n\t"
2279b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
2289b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
2299b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],       %[r],           %[c]         \n\t"
2309b35249446b07f40ac5fcc3205f2c048616efacchkuang      "sll            %[r3],      %[r3],          24           \n\t"
2319b35249446b07f40ac5fcc3205f2c048616efacchkuang
2329b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* mask |= (abs(q3 - q2) > limit) */
2339b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[q3],          %[q2]        \n\t"
2349b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[q2],          %[q3]        \n\t"
2359b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
2369b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
2379b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],       %[r],           %[c]         \n\t"
2389b35249446b07f40ac5fcc3205f2c048616efacchkuang
2399b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), [r3] "=&r" (r3),
2409b35249446b07f40ac5fcc3205f2c048616efacchkuang        [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1)
2419b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2),
2429b35249446b07f40ac5fcc3205f2c048616efacchkuang        [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0),
2439b35249446b07f40ac5fcc3205f2c048616efacchkuang        [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh),
2449b35249446b07f40ac5fcc3205f2c048616efacchkuang        [flat_thresh] "r" (flat_thresh), [ones] "r" (ones)
2459b35249446b07f40ac5fcc3205f2c048616efacchkuang  );
2469b35249446b07f40ac5fcc3205f2c048616efacchkuang
2479b35249446b07f40ac5fcc3205f2c048616efacchkuang  __asm__ __volatile__ (
2489b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* abs(p0 - q0) */
2499b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],   %[p0],     %[q0]        \n\t"
2509b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k], %[q0],     %[p0]        \n\t"
2519b35249446b07f40ac5fcc3205f2c048616efacchkuang      "wrdsp          %[r3]                           \n\t"
2529b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[s1],  %[r_k],    %[c]         \n\t"
2539b35249446b07f40ac5fcc3205f2c048616efacchkuang
2549b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* abs(p1 - q1) */
2559b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],    %[p1],    %[q1]        \n\t"
2569b35249446b07f40ac5fcc3205f2c048616efacchkuang      "addu_s.qb      %[s3],   %[s1],    %[s1]        \n\t"
2579b35249446b07f40ac5fcc3205f2c048616efacchkuang      "pick.qb        %[hev1], %[ones],  $0           \n\t"
2589b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],  %[q1],    %[p1]        \n\t"
2599b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[s2],   %[r_k],   %[c]         \n\t"
2609b35249446b07f40ac5fcc3205f2c048616efacchkuang
2619b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > flimit * 2 + limit */
2629b35249446b07f40ac5fcc3205f2c048616efacchkuang      "shrl.qb        %[s2],   %[s2],     1           \n\t"
2639b35249446b07f40ac5fcc3205f2c048616efacchkuang      "addu_s.qb      %[s1],   %[s2],     %[s3]       \n\t"
2649b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],    %[flimit], %[s1]       \n\t"
2659b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],    %[r],      %[c]        \n\t"
2669b35249446b07f40ac5fcc3205f2c048616efacchkuang      "sll            %[r],    %[r],      24          \n\t"
2679b35249446b07f40ac5fcc3205f2c048616efacchkuang
2689b35249446b07f40ac5fcc3205f2c048616efacchkuang      "wrdsp          %[r]                            \n\t"
2699b35249446b07f40ac5fcc3205f2c048616efacchkuang      "pick.qb        %[s2],   $0,        %[ones]     \n\t"
2709b35249446b07f40ac5fcc3205f2c048616efacchkuang
2719b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1),
2729b35249446b07f40ac5fcc3205f2c048616efacchkuang        [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3)
2739b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3),
2749b35249446b07f40ac5fcc3205f2c048616efacchkuang        [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit)
2759b35249446b07f40ac5fcc3205f2c048616efacchkuang  );
2769b35249446b07f40ac5fcc3205f2c048616efacchkuang
2779b35249446b07f40ac5fcc3205f2c048616efacchkuang  *hev = hev1;
2789b35249446b07f40ac5fcc3205f2c048616efacchkuang  *mask = s2;
2799b35249446b07f40ac5fcc3205f2c048616efacchkuang  *flat = flat1;
2809b35249446b07f40ac5fcc3205f2c048616efacchkuang}
2819b35249446b07f40ac5fcc3205f2c048616efacchkuang
2829b35249446b07f40ac5fcc3205f2c048616efacchkuangstatic INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3,
2839b35249446b07f40ac5fcc3205f2c048616efacchkuang                                 uint32_t p2, uint32_t p1,
2849b35249446b07f40ac5fcc3205f2c048616efacchkuang                                 uint32_t p0, uint32_t q0,
2859b35249446b07f40ac5fcc3205f2c048616efacchkuang                                 uint32_t q1, uint32_t q2,
2869b35249446b07f40ac5fcc3205f2c048616efacchkuang                                 uint32_t q3, uint32_t q4,
2879b35249446b07f40ac5fcc3205f2c048616efacchkuang                                 uint32_t *flat2) {
2889b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  c, r, r_k, r_flat;
2899b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  ones = 0xFFFFFFFF;
2909b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  flat_thresh = 0x01010101;
2919b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  flat1, flat3;
2929b35249446b07f40ac5fcc3205f2c048616efacchkuang
2939b35249446b07f40ac5fcc3205f2c048616efacchkuang  __asm__ __volatile__ (
2949b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(p4 - p0) > thresh) */
2959b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],   %[p4],           %[p0]        \n\t"
2969b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k], %[p0],           %[p4]        \n\t"
2979b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k], %[r_k],          %[c]         \n\t"
2989b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],   %[flat_thresh],  %[r_k]       \n\t"
2999b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],   $0,              %[c]         \n\t"
3009b35249446b07f40ac5fcc3205f2c048616efacchkuang
3019b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(q4 - q0) > thresh) */
3029b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],     %[q4],           %[q0]     \n\t"
3039b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],   %[q0],           %[q4]     \n\t"
3049b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],   %[r_k],          %[c]      \n\t"
3059b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],     %[flat_thresh],  %[r_k]    \n\t"
3069b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r],     %[r],            %[c]      \n\t"
3079b35249446b07f40ac5fcc3205f2c048616efacchkuang      "sll            %[r],     %[r],            24        \n\t"
3089b35249446b07f40ac5fcc3205f2c048616efacchkuang      "wrdsp          %[r]                                 \n\t"
3099b35249446b07f40ac5fcc3205f2c048616efacchkuang      "pick.qb        %[flat3], $0,           %[ones]      \n\t"
3109b35249446b07f40ac5fcc3205f2c048616efacchkuang
3119b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(p1 - p0) > thresh) */
3129b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[p1],          %[p0]        \n\t"
3139b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[p0],          %[p1]        \n\t"
3149b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
3159b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
3169b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  $0,             %[c]         \n\t"
3179b35249446b07f40ac5fcc3205f2c048616efacchkuang
3189b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(q1 - q0) > thresh) */
3199b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],      %[q1],           %[q0]        \n\t"
3209b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],    %[q0],           %[q1]        \n\t"
3219b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],    %[r_k],          %[c]         \n\t"
3229b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],      %[flat_thresh],  %[r_k]       \n\t"
3239b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat], %[r_flat],       %[c]         \n\t"
3249b35249446b07f40ac5fcc3205f2c048616efacchkuang
3259b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(p0 - p2) > thresh) */
3269b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[p0],          %[p2]        \n\t"
3279b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[p2],          %[p0]        \n\t"
3289b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
3299b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
3309b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
3319b35249446b07f40ac5fcc3205f2c048616efacchkuang
3329b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(q0 - q2) > thresh) */
3339b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[q0],          %[q2]        \n\t"
3349b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[q2],          %[q0]        \n\t"
3359b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
3369b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
3379b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
3389b35249446b07f40ac5fcc3205f2c048616efacchkuang
3399b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(p3 - p0) > thresh) */
3409b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[p3],          %[p0]        \n\t"
3419b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[p0],          %[p3]        \n\t"
3429b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
3439b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
3449b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
3459b35249446b07f40ac5fcc3205f2c048616efacchkuang
3469b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat |= (abs(q3 - q0) > thresh) */
3479b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[c],       %[q3],          %[q0]        \n\t"
3489b35249446b07f40ac5fcc3205f2c048616efacchkuang      "subu_s.qb      %[r_k],     %[q0],          %[q3]        \n\t"
3499b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_k],     %[r_k],         %[c]         \n\t"
3509b35249446b07f40ac5fcc3205f2c048616efacchkuang      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
3519b35249446b07f40ac5fcc3205f2c048616efacchkuang      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
3529b35249446b07f40ac5fcc3205f2c048616efacchkuang      "sll            %[r_flat],  %[r_flat],      24           \n\t"
3539b35249446b07f40ac5fcc3205f2c048616efacchkuang      "wrdsp          %[r_flat]                                \n\t"
3549b35249446b07f40ac5fcc3205f2c048616efacchkuang      "pick.qb        %[flat1],   $0,             %[ones]      \n\t"
3559b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */
3569b35249446b07f40ac5fcc3205f2c048616efacchkuang      "and            %[flat1],  %[flat3],        %[flat1]     \n\t"
3579b35249446b07f40ac5fcc3205f2c048616efacchkuang
3589b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r),
3599b35249446b07f40ac5fcc3205f2c048616efacchkuang        [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1), [flat3] "=&r" (flat3)
3609b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2),
3619b35249446b07f40ac5fcc3205f2c048616efacchkuang        [p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1),
3629b35249446b07f40ac5fcc3205f2c048616efacchkuang        [q2] "r" (q2), [q3] "r" (q3), [q4] "r" (q4),
3639b35249446b07f40ac5fcc3205f2c048616efacchkuang        [flat_thresh] "r" (flat_thresh), [ones] "r" (ones)
3649b35249446b07f40ac5fcc3205f2c048616efacchkuang  );
3659b35249446b07f40ac5fcc3205f2c048616efacchkuang
3669b35249446b07f40ac5fcc3205f2c048616efacchkuang  *flat2 = flat1;
3679b35249446b07f40ac5fcc3205f2c048616efacchkuang}
3689b35249446b07f40ac5fcc3205f2c048616efacchkuang#endif  // #if HAVE_DSPR2
369b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian#ifdef __cplusplus
370b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}  // extern "C"
371b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian#endif
372b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
3739b35249446b07f40ac5fcc3205f2c048616efacchkuang#endif  // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
374