1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/*
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#ifndef VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#define VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include <stdlib.h>
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vp9_rtcd.h"
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_common.h"
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_onyxc_int.h"
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan#ifdef __cplusplus
21233d2500723e5594f3e7c70896ffeeef32b9c950ywanextern "C" {
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_DSPR2
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* processing 4 pixels at the same time
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan * compute hev and mask in the same function */
27233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic INLINE void vp9_filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             uint32_t p1, uint32_t p0,
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             uint32_t p3, uint32_t p2,
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             uint32_t q0, uint32_t q1,
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             uint32_t q2, uint32_t q3,
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             uint32_t thresh, uint32_t *hev,
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                             uint32_t *mask) {
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  c, r, r3, r_k;
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  s1, s2, s3;
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  ones = 0xFFFFFFFF;
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  hev1;
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan  __asm__ __volatile__ (
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(p3 - p2) > limit) */
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],   %[p3],     %[p2]        \n\t"
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k], %[p2],     %[p3]        \n\t"
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k], %[r_k],    %[c]         \n\t"
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],   $0,        %[c]         \n\t"
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(p2 - p1) > limit) */
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],   %[p2],     %[p1]        \n\t"
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k], %[p1],     %[p2]        \n\t"
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k], %[r_k],    %[c]         \n\t"
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],   %[r],      %[c]         \n\t"
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(p1 - p0) > limit)
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan       * hev  |= (abs(p1 - p0) > thresh)
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan       */
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],   %[p1],     %[p0]        \n\t"
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k], %[p0],     %[p1]        \n\t"
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k], %[r_k],    %[c]         \n\t"
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],   %[thresh], %[r_k]       \n\t"
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r3],  $0,        %[c]         \n\t"
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],   %[r],      %[c]         \n\t"
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(q1 - q0) > limit)
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan       * hev  |= (abs(q1 - q0) > thresh)
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan       */
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],   %[q1],     %[q0]        \n\t"
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k], %[q0],     %[q1]        \n\t"
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k], %[r_k],    %[c]         \n\t"
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],   %[thresh], %[r_k]       \n\t"
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r3],  %[r3],     %[c]         \n\t"
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],   %[r],      %[c]         \n\t"
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(q2 - q1) > limit) */
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],   %[q2],     %[q1]        \n\t"
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k], %[q1],     %[q2]        \n\t"
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k], %[r_k],    %[c]         \n\t"
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],   %[r],      %[c]         \n\t"
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "sll            %[r3],    %[r3],    24          \n\t"
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(q3 - q2) > limit) */
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],   %[q3],     %[q2]        \n\t"
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k], %[q2],     %[q3]        \n\t"
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k], %[r_k],    %[c]         \n\t"
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],   %[r],      %[c]         \n\t"
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [c] "=&r" (c), [r_k] "=&r" (r_k),
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [r] "=&r" (r), [r3] "=&r" (r3)
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2),
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0),
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh)
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan  );
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan  __asm__ __volatile__ (
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* abs(p0 - q0) */
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],   %[p0],     %[q0]        \n\t"
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k], %[q0],     %[p0]        \n\t"
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "wrdsp          %[r3]                           \n\t"
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[s1],  %[r_k],    %[c]         \n\t"
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* abs(p1 - q1) */
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],    %[p1],    %[q1]        \n\t"
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "addu_s.qb      %[s3],   %[s1],    %[s1]        \n\t"
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "pick.qb        %[hev1], %[ones],  $0           \n\t"
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],  %[q1],    %[p1]        \n\t"
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[s2],   %[r_k],   %[c]         \n\t"
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > flimit * 2 + limit */
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "shrl.qb        %[s2],   %[s2],     1           \n\t"
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "addu_s.qb      %[s1],   %[s2],     %[s3]       \n\t"
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],    %[flimit], %[s1]       \n\t"
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],    %[r],      %[c]        \n\t"
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "sll            %[r],    %[r],      24          \n\t"
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "wrdsp          %[r]                            \n\t"
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "pick.qb        %[s2],  $0,         %[ones]     \n\t"
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1),
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3)
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3),
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit)
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan  );
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *hev = hev1;
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *mask = s2;
130233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan
132233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic INLINE void vp9_filter_hev_mask_flatmask4_dspr2(uint32_t limit,
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                       uint32_t flimit,
134233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                       uint32_t thresh,
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                       uint32_t p1, uint32_t p0,
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                       uint32_t p3, uint32_t p2,
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                       uint32_t q0, uint32_t q1,
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                       uint32_t q2, uint32_t q3,
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                       uint32_t *hev,
140233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                       uint32_t *mask,
141233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                                       uint32_t *flat) {
142233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  c, r, r3, r_k, r_flat;
143233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  s1, s2, s3;
144233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  ones = 0xFFFFFFFF;
145233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  flat_thresh = 0x01010101;
146233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  hev1;
147233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  flat1;
148233d2500723e5594f3e7c70896ffeeef32b9c950ywan
149233d2500723e5594f3e7c70896ffeeef32b9c950ywan  __asm__ __volatile__ (
150233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(p3 - p2) > limit) */
151233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[p3],          %[p2]        \n\t"
152233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[p2],          %[p3]        \n\t"
153233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
154233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
155233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],       $0,             %[c]         \n\t"
156233d2500723e5594f3e7c70896ffeeef32b9c950ywan
157233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(p2 - p1) > limit) */
158233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[p2],          %[p1]        \n\t"
159233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[p1],          %[p2]        \n\t"
160233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
161233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
162233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],       %[r],           %[c]         \n\t"
163233d2500723e5594f3e7c70896ffeeef32b9c950ywan
164233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(p1 - p0) > limit)
165233d2500723e5594f3e7c70896ffeeef32b9c950ywan       * hev  |= (abs(p1 - p0) > thresh)
166233d2500723e5594f3e7c70896ffeeef32b9c950ywan       * flat |= (abs(p1 - p0) > thresh)
167233d2500723e5594f3e7c70896ffeeef32b9c950ywan       */
168233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[p1],          %[p0]        \n\t"
169233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[p0],          %[p1]        \n\t"
170233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
171233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[thresh],      %[r_k]       \n\t"
172233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r3],      $0,             %[c]         \n\t"
173233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
174233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],       %[r],           %[c]         \n\t"
175233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
176233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  $0,             %[c]         \n\t"
177233d2500723e5594f3e7c70896ffeeef32b9c950ywan
178233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(q1 - q0) > limit)
179233d2500723e5594f3e7c70896ffeeef32b9c950ywan       * hev  |= (abs(q1 - q0) > thresh)
180233d2500723e5594f3e7c70896ffeeef32b9c950ywan       * flat |= (abs(q1 - q0) > thresh)
181233d2500723e5594f3e7c70896ffeeef32b9c950ywan       */
182233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[q1],          %[q0]        \n\t"
183233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[q0],          %[q1]        \n\t"
184233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
185233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[thresh],      %[r_k]       \n\t"
186233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r3],      %[r3],          %[c]         \n\t"
187233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
188233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],       %[r],           %[c]         \n\t"
189233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
190233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
191233d2500723e5594f3e7c70896ffeeef32b9c950ywan
192233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(p0 - p2) > thresh) */
193233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[p0],          %[p2]        \n\t"
194233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[p2],          %[p0]        \n\t"
195233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
196233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
197233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
198233d2500723e5594f3e7c70896ffeeef32b9c950ywan
199233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(q0 - q2) > thresh) */
200233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[q0],          %[q2]        \n\t"
201233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[q2],          %[q0]        \n\t"
202233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
203233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
204233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
205233d2500723e5594f3e7c70896ffeeef32b9c950ywan
206233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(p3 - p0) > thresh) */
207233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[p3],          %[p0]        \n\t"
208233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[p0],          %[p3]        \n\t"
209233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
210233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
211233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
212233d2500723e5594f3e7c70896ffeeef32b9c950ywan
213233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(q3 - q0) > thresh) */
214233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[q3],          %[q0]        \n\t"
215233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[q0],          %[q3]        \n\t"
216233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
217233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
218233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
219233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "sll            %[r_flat],  %[r_flat],      24           \n\t"
220233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* look at stall here */
221233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "wrdsp          %[r_flat]                                \n\t"
222233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "pick.qb        %[flat1],   $0,             %[ones]      \n\t"
223233d2500723e5594f3e7c70896ffeeef32b9c950ywan
224233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(q2 - q1) > limit) */
225233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[q2],          %[q1]        \n\t"
226233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[q1],          %[q2]        \n\t"
227233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
228233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
229233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],       %[r],           %[c]         \n\t"
230233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "sll            %[r3],      %[r3],          24           \n\t"
231233d2500723e5594f3e7c70896ffeeef32b9c950ywan
232233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* mask |= (abs(q3 - q2) > limit) */
233233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[q3],          %[q2]        \n\t"
234233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[q2],          %[q3]        \n\t"
235233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
236233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
237233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],       %[r],           %[c]         \n\t"
238233d2500723e5594f3e7c70896ffeeef32b9c950ywan
239233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r), [r3] "=&r" (r3),
240233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1)
241233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [limit] "r" (limit), [p3] "r" (p3), [p2] "r" (p2),
242233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [p1] "r" (p1), [p0] "r" (p0), [q1] "r" (q1), [q0] "r" (q0),
243233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [q2] "r" (q2), [q3] "r" (q3), [thresh] "r" (thresh),
244233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [flat_thresh] "r" (flat_thresh), [ones] "r" (ones)
245233d2500723e5594f3e7c70896ffeeef32b9c950ywan  );
246233d2500723e5594f3e7c70896ffeeef32b9c950ywan
247233d2500723e5594f3e7c70896ffeeef32b9c950ywan  __asm__ __volatile__ (
248233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* abs(p0 - q0) */
249233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],   %[p0],     %[q0]        \n\t"
250233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k], %[q0],     %[p0]        \n\t"
251233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "wrdsp          %[r3]                           \n\t"
252233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[s1],  %[r_k],    %[c]         \n\t"
253233d2500723e5594f3e7c70896ffeeef32b9c950ywan
254233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* abs(p1 - q1) */
255233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],    %[p1],    %[q1]        \n\t"
256233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "addu_s.qb      %[s3],   %[s1],    %[s1]        \n\t"
257233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "pick.qb        %[hev1], %[ones],  $0           \n\t"
258233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],  %[q1],    %[p1]        \n\t"
259233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[s2],   %[r_k],   %[c]         \n\t"
260233d2500723e5594f3e7c70896ffeeef32b9c950ywan
261233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > flimit * 2 + limit */
262233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "shrl.qb        %[s2],   %[s2],     1           \n\t"
263233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "addu_s.qb      %[s1],   %[s2],     %[s3]       \n\t"
264233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],    %[flimit], %[s1]       \n\t"
265233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],    %[r],      %[c]        \n\t"
266233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "sll            %[r],    %[r],      24          \n\t"
267233d2500723e5594f3e7c70896ffeeef32b9c950ywan
268233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "wrdsp          %[r]                            \n\t"
269233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "pick.qb        %[s2],   $0,        %[ones]     \n\t"
270233d2500723e5594f3e7c70896ffeeef32b9c950ywan
271233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [c] "=&r" (c), [r_k] "=&r" (r_k), [s1] "=&r" (s1), [hev1] "=&r" (hev1),
272233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [s2] "=&r" (s2), [r] "+r" (r), [s3] "=&r" (s3)
273233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [p0] "r" (p0), [q0] "r" (q0), [p1] "r" (p1), [r3] "r" (r3),
274233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [q1] "r" (q1), [ones] "r" (ones), [flimit] "r" (flimit)
275233d2500723e5594f3e7c70896ffeeef32b9c950ywan  );
276233d2500723e5594f3e7c70896ffeeef32b9c950ywan
277233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *hev = hev1;
278233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *mask = s2;
279233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *flat = flat1;
280233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
281233d2500723e5594f3e7c70896ffeeef32b9c950ywan
282233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic INLINE void vp9_flatmask5(uint32_t p4, uint32_t p3,
283233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 uint32_t p2, uint32_t p1,
284233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 uint32_t p0, uint32_t q0,
285233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 uint32_t q1, uint32_t q2,
286233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 uint32_t q3, uint32_t q4,
287233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 uint32_t *flat2) {
288233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  c, r, r_k, r_flat;
289233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  ones = 0xFFFFFFFF;
290233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  flat_thresh = 0x01010101;
291233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  flat1, flat3;
292233d2500723e5594f3e7c70896ffeeef32b9c950ywan
293233d2500723e5594f3e7c70896ffeeef32b9c950ywan  __asm__ __volatile__ (
294233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(p4 - p0) > thresh) */
295233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],   %[p4],           %[p0]        \n\t"
296233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k], %[p0],           %[p4]        \n\t"
297233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k], %[r_k],          %[c]         \n\t"
298233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],   %[flat_thresh],  %[r_k]       \n\t"
299233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],   $0,              %[c]         \n\t"
300233d2500723e5594f3e7c70896ffeeef32b9c950ywan
301233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(q4 - q0) > thresh) */
302233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],     %[q4],           %[q0]     \n\t"
303233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],   %[q0],           %[q4]     \n\t"
304233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],   %[r_k],          %[c]      \n\t"
305233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],     %[flat_thresh],  %[r_k]    \n\t"
306233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r],     %[r],            %[c]      \n\t"
307233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "sll            %[r],     %[r],            24        \n\t"
308233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "wrdsp          %[r]                                 \n\t"
309233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "pick.qb        %[flat3], $0,           %[ones]      \n\t"
310233d2500723e5594f3e7c70896ffeeef32b9c950ywan
311233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(p1 - p0) > thresh) */
312233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[p1],          %[p0]        \n\t"
313233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[p0],          %[p1]        \n\t"
314233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
315233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
316233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  $0,             %[c]         \n\t"
317233d2500723e5594f3e7c70896ffeeef32b9c950ywan
318233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(q1 - q0) > thresh) */
319233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],      %[q1],           %[q0]        \n\t"
320233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],    %[q0],           %[q1]        \n\t"
321233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],    %[r_k],          %[c]         \n\t"
322233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],      %[flat_thresh],  %[r_k]       \n\t"
323233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat], %[r_flat],       %[c]         \n\t"
324233d2500723e5594f3e7c70896ffeeef32b9c950ywan
325233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(p0 - p2) > thresh) */
326233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[p0],          %[p2]        \n\t"
327233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[p2],          %[p0]        \n\t"
328233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
329233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
330233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
331233d2500723e5594f3e7c70896ffeeef32b9c950ywan
332233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(q0 - q2) > thresh) */
333233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[q0],          %[q2]        \n\t"
334233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[q2],          %[q0]        \n\t"
335233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
336233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
337233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
338233d2500723e5594f3e7c70896ffeeef32b9c950ywan
339233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(p3 - p0) > thresh) */
340233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[p3],          %[p0]        \n\t"
341233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[p0],          %[p3]        \n\t"
342233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
343233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
344233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
345233d2500723e5594f3e7c70896ffeeef32b9c950ywan
346233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat |= (abs(q3 - q0) > thresh) */
347233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[c],       %[q3],          %[q0]        \n\t"
348233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "subu_s.qb      %[r_k],     %[q0],          %[q3]        \n\t"
349233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_k],     %[r_k],         %[c]         \n\t"
350233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
351233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
352233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "sll            %[r_flat],  %[r_flat],      24           \n\t"
353233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "wrdsp          %[r_flat]                                \n\t"
354233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "pick.qb        %[flat1],   $0,             %[ones]      \n\t"
355233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */
356233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "and            %[flat1],  %[flat3],        %[flat1]     \n\t"
357233d2500723e5594f3e7c70896ffeeef32b9c950ywan
358233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r),
359233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1), [flat3] "=&r" (flat3)
360233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2),
361233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1),
362233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [q2] "r" (q2), [q3] "r" (q3), [q4] "r" (q4),
363233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [flat_thresh] "r" (flat_thresh), [ones] "r" (ones)
364233d2500723e5594f3e7c70896ffeeef32b9c950ywan  );
365233d2500723e5594f3e7c70896ffeeef32b9c950ywan
366233d2500723e5594f3e7c70896ffeeef32b9c950ywan  *flat2 = flat1;
367233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
368233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif  // #if HAVE_DSPR2
369233d2500723e5594f3e7c70896ffeeef32b9c950ywan#ifdef __cplusplus
370233d2500723e5594f3e7c70896ffeeef32b9c950ywan}  // extern "C"
371233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif
372233d2500723e5594f3e7c70896ffeeef32b9c950ywan
373233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif  // VP9_COMMON_MIPS_DSPR2_VP9_LOOPFILTER_MASKS_DSPR2_H_
374