1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/*
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include <stdlib.h>
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vp9_rtcd.h"
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_common.h"
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_loopfilter.h"
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_onyxc_int.h"
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_DSPR2
23233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_lpf_horizontal_16_dspr2(unsigned char *s,
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int pitch,
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *blimit,
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *limit,
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 const uint8_t *thresh,
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                 int count) {
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  mask;
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  hev, flat, flat2;
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t   i;
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t   *sp7, *sp6, *sp5, *sp4, *sp3, *sp2, *sp1, *sp0;
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint8_t   *sq0, *sq1, *sq2, *sq3, *sq4, *sq5, *sq6, *sq7;
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  thresh_vec, flimit_vec, limit_vec;
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  uflimit, ulimit, uthresh;
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  p1_f0, p0_f0, q0_f0, q1_f0;
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l;
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l;
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r;
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r;
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1;
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uint32_t  q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1;
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uflimit = *blimit;
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan  ulimit  = *limit;
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan  uthresh = *thresh;
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan  /* create quad-byte */
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan  __asm__ __volatile__ (
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "replv.qb       %[thresh_vec],    %[uthresh]      \n\t"
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "replv.qb       %[flimit_vec],    %[uflimit]      \n\t"
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan      "replv.qb       %[limit_vec],     %[ulimit]       \n\t"
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec),
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan        [limit_vec] "=r" (limit_vec)
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan      : [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan  );
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan  /* prefetch data for store */
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan  vp9_prefetch_store(s);
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan  for (i = 0; i < (2 * count); i++) {
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sp7 = s - (pitch << 3);
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sp6 = sp7 + pitch;
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sp5 = sp6 + pitch;
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sp4 = sp5 + pitch;
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sp3 = sp4 + pitch;
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sp2 = sp3 + pitch;
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sp1 = sp2 + pitch;
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sp0 = sp1 + pitch;
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sq0 = s;
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sq1 = s + pitch;
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sq2 = sq1 + pitch;
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sq3 = sq2 + pitch;
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sq4 = sq3 + pitch;
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sq5 = sq4 + pitch;
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sq6 = sq5 + pitch;
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan    sq7 = sq6 + pitch;
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan    __asm__ __volatile__ (
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[p7],      (%[sp7])            \n\t"
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[p6],      (%[sp6])            \n\t"
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[p5],      (%[sp5])            \n\t"
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[p4],      (%[sp4])            \n\t"
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[p3],      (%[sp3])            \n\t"
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[p2],      (%[sp2])            \n\t"
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[p1],      (%[sp1])            \n\t"
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[p0],      (%[sp0])            \n\t"
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan        : [p3] "=&r" (p3), [p2] "=&r" (p2), [p1] "=&r" (p1), [p0] "=&r" (p0),
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan          [p7] "=&r" (p7), [p6] "=&r" (p6), [p5] "=&r" (p5), [p4] "=&r" (p4)
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan        : [sp3] "r" (sp3), [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan          [sp4] "r" (sp4), [sp5] "r" (sp5), [sp6] "r" (sp6), [sp7] "r" (sp7)
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan    );
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan    __asm__ __volatile__ (
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[q0],      (%[sq0])            \n\t"
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[q1],      (%[sq1])            \n\t"
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[q2],      (%[sq2])            \n\t"
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[q3],      (%[sq3])            \n\t"
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[q4],      (%[sq4])            \n\t"
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[q5],      (%[sq5])            \n\t"
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[q6],      (%[sq6])            \n\t"
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "lw     %[q7],      (%[sq7])            \n\t"
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan        : [q3] "=&r" (q3), [q2] "=&r" (q2), [q1] "=&r" (q1), [q0] "=&r" (q0),
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan          [q7] "=&r" (q7), [q6] "=&r" (q6), [q5] "=&r" (q5), [q4] "=&r" (q4)
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan        : [sq3] "r" (sq3), [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0),
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan          [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6), [sq7] "r" (sq7)
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan    );
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                        p1, p0, p3, p2, q0, q1, q2, q3,
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan                                        &hev, &mask, &flat);
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan    /* f0 */
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan    if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan        ((flat2 != 0) && (flat == 0) && (mask != 0))) {
122233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
123233d2500723e5594f3e7c70896ffeeef32b9c950ywan                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);
124233d2500723e5594f3e7c70896ffeeef32b9c950ywan
125233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
126233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw       %[p1_f0],   (%[sp1])            \n\t"
127233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw       %[p0_f0],   (%[sp0])            \n\t"
128233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw       %[q0_f0],   (%[sq0])            \n\t"
129233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw       %[q1_f0],   (%[sq1])            \n\t"
130233d2500723e5594f3e7c70896ffeeef32b9c950ywan
131233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
132233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
133233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
134233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [sp1] "r" (sp1), [sp0] "r" (sp0),
135233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [sq0] "r" (sq0), [sq1] "r" (sq1)
136233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
137233d2500723e5594f3e7c70896ffeeef32b9c950ywan    } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
138233d2500723e5594f3e7c70896ffeeef32b9c950ywan               (mask == 0xFFFFFFFF)) {
139233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* f2 */
140233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_LEFT_0TO3()
141233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_LEFT_4TO7()
142233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
143233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &p3_l, &p2_l, &p1_l, &p0_l,
144233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &q0_l, &q1_l, &q2_l, &q3_l,
145233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &q4_l, &q5_l, &q6_l, &q7_l);
146233d2500723e5594f3e7c70896ffeeef32b9c950ywan
147233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_RIGHT_0TO3()
148233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_RIGHT_4TO7()
149233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
150233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &p3_r, &p2_r, &p1_r, &p0_r,
151233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &q0_r, &q1_r, &q2_r, &q3_r,
152233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &q4_r, &q5_r, &q6_r, &q7_r);
153233d2500723e5594f3e7c70896ffeeef32b9c950ywan
154233d2500723e5594f3e7c70896ffeeef32b9c950ywan      COMBINE_LEFT_RIGHT_0TO2()
155233d2500723e5594f3e7c70896ffeeef32b9c950ywan      COMBINE_LEFT_RIGHT_3TO6()
156233d2500723e5594f3e7c70896ffeeef32b9c950ywan
157233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
158233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p6], (%[sp6])    \n\t"
159233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p5], (%[sp5])    \n\t"
160233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p4], (%[sp4])    \n\t"
161233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p3], (%[sp3])    \n\t"
162233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p2], (%[sp2])    \n\t"
163233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p1], (%[sp1])    \n\t"
164233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p0], (%[sp0])    \n\t"
165233d2500723e5594f3e7c70896ffeeef32b9c950ywan
166233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
167233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [p6] "r" (p6), [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3),
168233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0),
169233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [sp6] "r" (sp6), [sp5] "r" (sp5), [sp4] "r" (sp4), [sp3] "r" (sp3),
170233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0)
171233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
172233d2500723e5594f3e7c70896ffeeef32b9c950ywan
173233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
174233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q6], (%[sq6])    \n\t"
175233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q5], (%[sq5])    \n\t"
176233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q4], (%[sq4])    \n\t"
177233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q3], (%[sq3])    \n\t"
178233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q2], (%[sq2])    \n\t"
179233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q1], (%[sq1])    \n\t"
180233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q0], (%[sq0])    \n\t"
181233d2500723e5594f3e7c70896ffeeef32b9c950ywan
182233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
183233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [q6] "r" (q6), [q5] "r" (q5), [q4] "r" (q4), [q3] "r" (q3),
184233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q2] "r" (q2), [q1] "r" (q1), [q0] "r" (q0),
185233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [sq6] "r" (sq6), [sq5] "r" (sq5), [sq4] "r" (sq4), [sq3] "r" (sq3),
186233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0)
187233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
188233d2500723e5594f3e7c70896ffeeef32b9c950ywan    } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
189233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* f1 */
190233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* left 2 element operation */
191233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_LEFT_0TO3()
192233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
193233d2500723e5594f3e7c70896ffeeef32b9c950ywan                         &q0_l, &q1_l, &q2_l, &q3_l);
194233d2500723e5594f3e7c70896ffeeef32b9c950ywan
195233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* right 2 element operation */
196233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_RIGHT_0TO3()
197233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
198233d2500723e5594f3e7c70896ffeeef32b9c950ywan                         &q0_r, &q1_r, &q2_r, &q3_r);
199233d2500723e5594f3e7c70896ffeeef32b9c950ywan
200233d2500723e5594f3e7c70896ffeeef32b9c950ywan      COMBINE_LEFT_RIGHT_0TO2()
201233d2500723e5594f3e7c70896ffeeef32b9c950ywan
202233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
203233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p2], (%[sp2])    \n\t"
204233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p1], (%[sp1])    \n\t"
205233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[p0], (%[sp0])    \n\t"
206233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q0], (%[sq0])    \n\t"
207233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q1], (%[sq1])    \n\t"
208233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "sw         %[q2], (%[sq2])    \n\t"
209233d2500723e5594f3e7c70896ffeeef32b9c950ywan
210233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
211233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0),
212233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0] "r" (q0), [q1] "r" (q1), [q2] "r" (q2),
213233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
214233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
215233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
216233d2500723e5594f3e7c70896ffeeef32b9c950ywan    } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
217233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* f0+f1 */
218233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
219233d2500723e5594f3e7c70896ffeeef32b9c950ywan                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);
220233d2500723e5594f3e7c70896ffeeef32b9c950ywan
221233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* left 2 element operation */
222233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_LEFT_0TO3()
223233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
224233d2500723e5594f3e7c70896ffeeef32b9c950ywan                         &q0_l, &q1_l, &q2_l, &q3_l);
225233d2500723e5594f3e7c70896ffeeef32b9c950ywan
226233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* right 2 element operation */
227233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_RIGHT_0TO3()
228233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
229233d2500723e5594f3e7c70896ffeeef32b9c950ywan                         &q0_r, &q1_r, &q2_r, &q3_r);
230233d2500723e5594f3e7c70896ffeeef32b9c950ywan
231233d2500723e5594f3e7c70896ffeeef32b9c950ywan      if (mask & flat & 0x000000FF) {
232233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
233233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_r],  (%[sp2])    \n\t"
234233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_r],  (%[sp1])    \n\t"
235233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_r],  (%[sp0])    \n\t"
236233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_r],  (%[sq0])    \n\t"
237233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_r],  (%[sq1])    \n\t"
238233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_r],  (%[sq2])    \n\t"
239233d2500723e5594f3e7c70896ffeeef32b9c950ywan
240233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
241233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p2_r] "r" (p2_r), [p1_r] "r" (p1_r), [p0_r] "r" (p0_r),
242233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r),
243233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
244233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
245233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
246233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & 0x000000FF) {
247233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
248233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_f0],  (%[sp1])    \n\t"
249233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_f0],  (%[sp0])    \n\t"
250233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_f0],  (%[sq0])    \n\t"
251233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_f0],  (%[sq1])    \n\t"
252233d2500723e5594f3e7c70896ffeeef32b9c950ywan
253233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
254233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
255233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
256233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp1] "r" (sp1), [sp0] "r" (sp0),
257233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1)
258233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
259233d2500723e5594f3e7c70896ffeeef32b9c950ywan      }
260233d2500723e5594f3e7c70896ffeeef32b9c950ywan
261233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
262233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p2_r],    %[p2_r],    16      \n\t"
263233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p1_r],    %[p1_r],    16      \n\t"
264233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p0_r],    %[p0_r],    16      \n\t"
265233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q0_r],    %[q0_r],    16      \n\t"
266233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q1_r],    %[q1_r],    16      \n\t"
267233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q2_r],    %[q2_r],    16      \n\t"
268233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
269233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
270233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
271233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
272233d2500723e5594f3e7c70896ffeeef32b9c950ywan
273233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [p2_r] "+r" (p2_r), [p1_r] "+r" (p1_r), [p0_r] "+r" (p0_r),
274233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0_r] "+r" (q0_r), [q1_r] "+r" (q1_r), [q2_r] "+r" (q2_r),
275233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
276233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
277233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
278233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
279233d2500723e5594f3e7c70896ffeeef32b9c950ywan
280233d2500723e5594f3e7c70896ffeeef32b9c950ywan      if (mask & flat & 0x0000FF00) {
281233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
282233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_r],  +1(%[sp2])    \n\t"
283233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_r],  +1(%[sp1])    \n\t"
284233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_r],  +1(%[sp0])    \n\t"
285233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_r],  +1(%[sq0])    \n\t"
286233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_r],  +1(%[sq1])    \n\t"
287233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_r],  +1(%[sq2])    \n\t"
288233d2500723e5594f3e7c70896ffeeef32b9c950ywan
289233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
290233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p2_r] "r" (p2_r), [p1_r] "r" (p1_r), [p0_r] "r" (p0_r),
291233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r),
292233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
293233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
294233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
295233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & 0x0000FF00) {
296233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
297233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_f0],  +1(%[sp1])    \n\t"
298233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_f0],  +1(%[sp0])    \n\t"
299233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_f0],  +1(%[sq0])    \n\t"
300233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_f0],  +1(%[sq1])    \n\t"
301233d2500723e5594f3e7c70896ffeeef32b9c950ywan
302233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
303233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
304233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
305233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp1] "r" (sp1), [sp0] "r" (sp0),
306233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1)
307233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
308233d2500723e5594f3e7c70896ffeeef32b9c950ywan      }
309233d2500723e5594f3e7c70896ffeeef32b9c950ywan
310233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
311233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p1_f0],   %[p1_f0],   8     \n\t"
312233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p0_f0],   %[p0_f0],   8     \n\t"
313233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q0_f0],   %[q0_f0],   8     \n\t"
314233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q1_f0],   %[q1_f0],   8     \n\t"
315233d2500723e5594f3e7c70896ffeeef32b9c950ywan
316233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
317233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
318233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
319233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
320233d2500723e5594f3e7c70896ffeeef32b9c950ywan
321233d2500723e5594f3e7c70896ffeeef32b9c950ywan      if (mask & flat & 0x00FF0000) {
322233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
323233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_l],  +2(%[sp2])    \n\t"
324233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_l],  +2(%[sp1])    \n\t"
325233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_l],  +2(%[sp0])    \n\t"
326233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_l],  +2(%[sq0])    \n\t"
327233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_l],  +2(%[sq1])    \n\t"
328233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_l],  +2(%[sq2])    \n\t"
329233d2500723e5594f3e7c70896ffeeef32b9c950ywan
330233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
331233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p2_l] "r" (p2_l), [p1_l] "r" (p1_l), [p0_l] "r" (p0_l),
332233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), [q2_l] "r" (q2_l),
333233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
334233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
335233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
336233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & 0x00FF0000) {
337233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
338233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_f0],  +2(%[sp1])    \n\t"
339233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_f0],  +2(%[sp0])    \n\t"
340233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_f0],  +2(%[sq0])    \n\t"
341233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_f0],  +2(%[sq1])    \n\t"
342233d2500723e5594f3e7c70896ffeeef32b9c950ywan
343233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
344233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
345233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
346233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp1] "r" (sp1), [sp0] "r" (sp0),
347233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1)
348233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
349233d2500723e5594f3e7c70896ffeeef32b9c950ywan      }
350233d2500723e5594f3e7c70896ffeeef32b9c950ywan
351233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
352233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p2_l],    %[p2_l],    16      \n\t"
353233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p1_l],    %[p1_l],    16      \n\t"
354233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p0_l],    %[p0_l],    16      \n\t"
355233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q0_l],    %[q0_l],    16      \n\t"
356233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q1_l],    %[q1_l],    16      \n\t"
357233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q2_l],    %[q2_l],    16      \n\t"
358233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
359233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
360233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
361233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
362233d2500723e5594f3e7c70896ffeeef32b9c950ywan
363233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [p2_l] "+r" (p2_l), [p1_l] "+r" (p1_l), [p0_l] "+r" (p0_l),
364233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0_l] "+r" (q0_l), [q1_l] "+r" (q1_l), [q2_l] "+r" (q2_l),
365233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
366233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
367233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
368233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
369233d2500723e5594f3e7c70896ffeeef32b9c950ywan
370233d2500723e5594f3e7c70896ffeeef32b9c950ywan      if (mask & flat & 0xFF000000) {
371233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
372233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_l],  +3(%[sp2])    \n\t"
373233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_l],  +3(%[sp1])    \n\t"
374233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_l],  +3(%[sp0])    \n\t"
375233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_l],  +3(%[sq0])    \n\t"
376233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_l],  +3(%[sq1])    \n\t"
377233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_l],  +3(%[sq2])    \n\t"
378233d2500723e5594f3e7c70896ffeeef32b9c950ywan
379233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
380233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p2_l] "r" (p2_l), [p1_l] "r" (p1_l), [p0_l] "r" (p0_l),
381233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), [q2_l] "r" (q2_l),
382233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
383233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
384233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
385233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & 0xFF000000) {
386233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
387233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_f0],  +3(%[sp1])    \n\t"
388233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_f0],  +3(%[sp0])    \n\t"
389233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_f0],  +3(%[sq0])    \n\t"
390233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_f0],  +3(%[sq1])    \n\t"
391233d2500723e5594f3e7c70896ffeeef32b9c950ywan
392233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
393233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
394233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
395233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp1] "r" (sp1), [sp0] "r" (sp0),
396233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1)
397233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
398233d2500723e5594f3e7c70896ffeeef32b9c950ywan      }
399233d2500723e5594f3e7c70896ffeeef32b9c950ywan    } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
400233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* f0 + f1 + f2 */
401233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* f0  function */
402233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
403233d2500723e5594f3e7c70896ffeeef32b9c950ywan                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);
404233d2500723e5594f3e7c70896ffeeef32b9c950ywan
405233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* f1  function */
406233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* left 2 element operation */
407233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_LEFT_0TO3()
408233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
409233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          q0_l, q1_l, q2_l, q3_l,
410233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          &p2_l_f1, &p1_l_f1, &p0_l_f1,
411233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          &q0_l_f1, &q1_l_f1, &q2_l_f1);
412233d2500723e5594f3e7c70896ffeeef32b9c950ywan
413233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* right 2 element operation */
414233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_RIGHT_0TO3()
415233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
416233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          q0_r, q1_r, q2_r, q3_r,
417233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          &p2_r_f1, &p1_r_f1, &p0_r_f1,
418233d2500723e5594f3e7c70896ffeeef32b9c950ywan                          &q0_r_f1, &q1_r_f1, &q2_r_f1);
419233d2500723e5594f3e7c70896ffeeef32b9c950ywan
420233d2500723e5594f3e7c70896ffeeef32b9c950ywan      /* f2  function */
421233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_LEFT_4TO7()
422233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
423233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &p3_l, &p2_l, &p1_l, &p0_l,
424233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &q0_l, &q1_l, &q2_l, &q3_l,
425233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &q4_l, &q5_l, &q6_l, &q7_l);
426233d2500723e5594f3e7c70896ffeeef32b9c950ywan
427233d2500723e5594f3e7c70896ffeeef32b9c950ywan      PACK_RIGHT_4TO7()
428233d2500723e5594f3e7c70896ffeeef32b9c950ywan      vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
429233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &p3_r, &p2_r, &p1_r, &p0_r,
430233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &q0_r, &q1_r, &q2_r, &q3_r,
431233d2500723e5594f3e7c70896ffeeef32b9c950ywan                              &q4_r, &q5_r, &q6_r, &q7_r);
432233d2500723e5594f3e7c70896ffeeef32b9c950ywan
433233d2500723e5594f3e7c70896ffeeef32b9c950ywan      if (mask & flat & flat2 & 0x000000FF) {
434233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
435233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p6_r],  (%[sp6])    \n\t"
436233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p5_r],  (%[sp5])    \n\t"
437233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p4_r],  (%[sp4])    \n\t"
438233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p3_r],  (%[sp3])    \n\t"
439233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_r],  (%[sp2])    \n\t"
440233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_r],  (%[sp1])    \n\t"
441233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_r],  (%[sp0])    \n\t"
442233d2500723e5594f3e7c70896ffeeef32b9c950ywan
443233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
444233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p6_r] "r" (p6_r), [p5_r] "r" (p5_r), [p4_r] "r" (p4_r),
445233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p3_r] "r" (p3_r), [p2_r] "r" (p2_r), [p1_r] "r" (p1_r),
446233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp6] "r" (sp6), [sp5] "r" (sp5), [sp4] "r" (sp4),
447233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp3] "r" (sp3), [sp2] "r" (sp2), [sp1] "r" (sp1),
448233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p0_r] "r" (p0_r), [sp0] "r" (sp0)
449233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
450233d2500723e5594f3e7c70896ffeeef32b9c950ywan
451233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
452233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_r],  (%[sq0])    \n\t"
453233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_r],  (%[sq1])    \n\t"
454233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_r],  (%[sq2])    \n\t"
455233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q3_r],  (%[sq3])    \n\t"
456233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q4_r],  (%[sq4])    \n\t"
457233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q5_r],  (%[sq5])    \n\t"
458233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q6_r],  (%[sq6])    \n\t"
459233d2500723e5594f3e7c70896ffeeef32b9c950ywan
460233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
461233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r),
462233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q3_r] "r" (q3_r), [q4_r] "r" (q4_r), [q5_r] "r" (q5_r),
463233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q6_r] "r" (q6_r),
464233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2),
465233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq3] "r" (sq3), [sq4] "r" (sq4), [sq5] "r" (sq5),
466233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq6] "r" (sq6)
467233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
468233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & flat & 0x000000FF) {
469233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
470233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_r_f1],  (%[sp2])    \n\t"
471233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_r_f1],  (%[sp1])    \n\t"
472233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_r_f1],  (%[sp0])    \n\t"
473233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_r_f1],  (%[sq0])    \n\t"
474233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_r_f1],  (%[sq1])    \n\t"
475233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_r_f1],  (%[sq2])    \n\t"
476233d2500723e5594f3e7c70896ffeeef32b9c950ywan
477233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
478233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p2_r_f1] "r" (p2_r_f1), [p1_r_f1] "r" (p1_r_f1),
479233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p0_r_f1] "r" (p0_r_f1), [q0_r_f1] "r" (q0_r_f1),
480233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q1_r_f1] "r" (q1_r_f1), [q2_r_f1] "r" (q2_r_f1),
481233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
482233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
483233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
484233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & 0x000000FF) {
485233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
486233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_f0],  (%[sp1])    \n\t"
487233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_f0],  (%[sp0])    \n\t"
488233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_f0],  (%[sq0])    \n\t"
489233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_f0],  (%[sq1])    \n\t"
490233d2500723e5594f3e7c70896ffeeef32b9c950ywan
491233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
492233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), [q0_f0] "r" (q0_f0),
493233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q1_f0] "r" (q1_f0), [sp1] "r" (sp1), [sp0] "r" (sp0),
494233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1)
495233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
496233d2500723e5594f3e7c70896ffeeef32b9c950ywan      }
497233d2500723e5594f3e7c70896ffeeef32b9c950ywan
498233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
499233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p6_r], %[p6_r], 16     \n\t"
500233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p5_r], %[p5_r], 16     \n\t"
501233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p4_r], %[p4_r], 16     \n\t"
502233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p3_r], %[p3_r], 16     \n\t"
503233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p2_r], %[p2_r], 16     \n\t"
504233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p1_r], %[p1_r], 16     \n\t"
505233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p0_r], %[p0_r], 16     \n\t"
506233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q0_r], %[q0_r], 16     \n\t"
507233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q1_r], %[q1_r], 16     \n\t"
508233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q2_r], %[q2_r], 16     \n\t"
509233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q3_r], %[q3_r], 16     \n\t"
510233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q4_r], %[q4_r], 16     \n\t"
511233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q5_r], %[q5_r], 16     \n\t"
512233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q6_r], %[q6_r], 16     \n\t"
513233d2500723e5594f3e7c70896ffeeef32b9c950ywan
514233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [q0_r] "+r" (q0_r), [q1_r] "+r" (q1_r), [q2_r] "+r" (q2_r),
515233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q3_r] "+r" (q3_r), [q4_r] "+r" (q4_r), [q5_r] "+r" (q5_r),
516233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p6_r] "+r" (p6_r), [p5_r] "+r" (p5_r), [p4_r] "+r" (p4_r),
517233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p3_r] "+r" (p3_r), [p2_r] "+r" (p2_r), [p1_r] "+r" (p1_r),
518233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q6_r] "+r" (q6_r), [p0_r] "+r" (p0_r)
519233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
520233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
521233d2500723e5594f3e7c70896ffeeef32b9c950ywan
522233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
523233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p2_r_f1], %[p2_r_f1], 16     \n\t"
524233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p1_r_f1], %[p1_r_f1], 16     \n\t"
525233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p0_r_f1], %[p0_r_f1], 16     \n\t"
526233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q0_r_f1], %[q0_r_f1], 16     \n\t"
527233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q1_r_f1], %[q1_r_f1], 16     \n\t"
528233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q2_r_f1], %[q2_r_f1], 16     \n\t"
529233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p1_f0],   %[p1_f0],   8      \n\t"
530233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p0_f0],   %[p0_f0],   8      \n\t"
531233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q0_f0],   %[q0_f0],   8      \n\t"
532233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q1_f0],   %[q1_f0],   8      \n\t"
533233d2500723e5594f3e7c70896ffeeef32b9c950ywan
534233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [p2_r_f1] "+r" (p2_r_f1), [p1_r_f1] "+r" (p1_r_f1),
535233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p0_r_f1] "+r" (p0_r_f1), [q0_r_f1] "+r" (q0_r_f1),
536233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q1_r_f1] "+r" (q1_r_f1), [q2_r_f1] "+r" (q2_r_f1),
537233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
538233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
539233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
540233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
541233d2500723e5594f3e7c70896ffeeef32b9c950ywan
542233d2500723e5594f3e7c70896ffeeef32b9c950ywan      if (mask & flat & flat2 & 0x0000FF00) {
543233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
544233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p6_r],  +1(%[sp6])    \n\t"
545233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p5_r],  +1(%[sp5])    \n\t"
546233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p4_r],  +1(%[sp4])    \n\t"
547233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p3_r],  +1(%[sp3])    \n\t"
548233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_r],  +1(%[sp2])    \n\t"
549233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_r],  +1(%[sp1])    \n\t"
550233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_r],  +1(%[sp0])    \n\t"
551233d2500723e5594f3e7c70896ffeeef32b9c950ywan
552233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
553233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p6_r] "r" (p6_r), [p5_r] "r" (p5_r), [p4_r] "r" (p4_r),
554233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p3_r] "r" (p3_r), [p2_r] "r" (p2_r), [p1_r] "r" (p1_r),
555233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p0_r] "r" (p0_r), [sp6] "r" (sp6), [sp5] "r" (sp5),
556233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp4] "r" (sp4), [sp3] "r" (sp3),
557233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0)
558233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
559233d2500723e5594f3e7c70896ffeeef32b9c950ywan
560233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
561233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_r],  +1(%[sq0])    \n\t"
562233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_r],  +1(%[sq1])    \n\t"
563233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_r],  +1(%[sq2])    \n\t"
564233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q3_r],  +1(%[sq3])    \n\t"
565233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q4_r],  +1(%[sq4])    \n\t"
566233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q5_r],  +1(%[sq5])    \n\t"
567233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q6_r],  +1(%[sq6])    \n\t"
568233d2500723e5594f3e7c70896ffeeef32b9c950ywan
569233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
570233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r),
571233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q3_r] "r" (q3_r), [q4_r] "r" (q4_r), [q5_r] "r" (q5_r),
572233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q6_r] "r" (q6_r), [sq0] "r" (sq0), [sq1] "r" (sq1),
573233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq2] "r" (sq2), [sq3] "r" (sq3),
574233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6)
575233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
576233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & flat & 0x0000FF00) {
577233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
578233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_r_f1],  +1(%[sp2])    \n\t"
579233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_r_f1],  +1(%[sp1])    \n\t"
580233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_r_f1],  +1(%[sp0])    \n\t"
581233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_r_f1],  +1(%[sq0])    \n\t"
582233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_r_f1],  +1(%[sq1])    \n\t"
583233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_r_f1],  +1(%[sq2])    \n\t"
584233d2500723e5594f3e7c70896ffeeef32b9c950ywan
585233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
586233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p2_r_f1] "r" (p2_r_f1), [p1_r_f1] "r" (p1_r_f1),
587233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p0_r_f1] "r" (p0_r_f1), [q0_r_f1] "r" (q0_r_f1),
588233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q1_r_f1] "r" (q1_r_f1), [q2_r_f1] "r" (q2_r_f1),
589233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
590233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
591233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
592233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & 0x0000FF00) {
593233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
594233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_f0],  +1(%[sp1])    \n\t"
595233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_f0],  +1(%[sp0])    \n\t"
596233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_f0],  +1(%[sq0])    \n\t"
597233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_f0],  +1(%[sq1])    \n\t"
598233d2500723e5594f3e7c70896ffeeef32b9c950ywan
599233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
600233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), [q0_f0] "r" (q0_f0),
601233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q1_f0] "r" (q1_f0), [sp1] "r" (sp1), [sp0] "r" (sp0),
602233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1)
603233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
604233d2500723e5594f3e7c70896ffeeef32b9c950ywan      }
605233d2500723e5594f3e7c70896ffeeef32b9c950ywan
606233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
607233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p1_f0], %[p1_f0], 8     \n\t"
608233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[p0_f0], %[p0_f0], 8     \n\t"
609233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q0_f0], %[q0_f0], 8     \n\t"
610233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl        %[q1_f0], %[q1_f0], 8     \n\t"
611233d2500723e5594f3e7c70896ffeeef32b9c950ywan
612233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
613233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
614233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
615233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
616233d2500723e5594f3e7c70896ffeeef32b9c950ywan
617233d2500723e5594f3e7c70896ffeeef32b9c950ywan      if (mask & flat & flat2 & 0x00FF0000) {
618233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
619233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p6_l],  +2(%[sp6])    \n\t"
620233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p5_l],  +2(%[sp5])    \n\t"
621233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p4_l],  +2(%[sp4])    \n\t"
622233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p3_l],  +2(%[sp3])    \n\t"
623233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_l],  +2(%[sp2])    \n\t"
624233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_l],  +2(%[sp1])    \n\t"
625233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_l],  +2(%[sp0])    \n\t"
626233d2500723e5594f3e7c70896ffeeef32b9c950ywan
627233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
628233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p6_l] "r" (p6_l), [p5_l] "r" (p5_l), [p4_l] "r" (p4_l),
629233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p3_l] "r" (p3_l), [p2_l] "r" (p2_l), [p1_l] "r" (p1_l),
630233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p0_l] "r" (p0_l), [sp6] "r" (sp6), [sp5] "r" (sp5),
631233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp4] "r" (sp4), [sp3] "r" (sp3),
632233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0)
633233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
634233d2500723e5594f3e7c70896ffeeef32b9c950ywan
635233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
636233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_l],  +2(%[sq0])    \n\t"
637233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_l],  +2(%[sq1])    \n\t"
638233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_l],  +2(%[sq2])    \n\t"
639233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q3_l],  +2(%[sq3])    \n\t"
640233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q4_l],  +2(%[sq4])    \n\t"
641233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q5_l],  +2(%[sq5])    \n\t"
642233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q6_l],  +2(%[sq6])    \n\t"
643233d2500723e5594f3e7c70896ffeeef32b9c950ywan
644233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
645233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), [q2_l] "r" (q2_l),
646233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q3_l] "r" (q3_l), [q4_l] "r" (q4_l), [q5_l] "r" (q5_l),
647233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q6_l] "r" (q6_l), [sq0] "r" (sq0), [sq1] "r" (sq1),
648233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq2] "r" (sq2), [sq3] "r" (sq3),
649233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6)
650233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
651233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & flat & 0x00FF0000) {
652233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
653233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p2_l_f1],  +2(%[sp2])    \n\t"
654233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_l_f1],  +2(%[sp1])    \n\t"
655233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_l_f1],  +2(%[sp0])    \n\t"
656233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_l_f1],  +2(%[sq0])    \n\t"
657233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_l_f1],  +2(%[sq1])    \n\t"
658233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q2_l_f1],  +2(%[sq2])    \n\t"
659233d2500723e5594f3e7c70896ffeeef32b9c950ywan
660233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
661233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p2_l_f1] "r" (p2_l_f1), [p1_l_f1] "r" (p1_l_f1),
662233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p0_l_f1] "r" (p0_l_f1), [q0_l_f1] "r" (q0_l_f1),
663233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q1_l_f1] "r" (q1_l_f1), [q2_l_f1] "r" (q2_l_f1),
664233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
665233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
666233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
667233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & 0x00FF0000) {
668233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
669233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p1_f0],  +2(%[sp1])    \n\t"
670233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[p0_f0],  +2(%[sp0])    \n\t"
671233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q0_f0],  +2(%[sq0])    \n\t"
672233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb         %[q1_f0],  +2(%[sq1])    \n\t"
673233d2500723e5594f3e7c70896ffeeef32b9c950ywan
674233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
675233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), [q0_f0] "r" (q0_f0),
676233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q1_f0] "r" (q1_f0), [sp1] "r" (sp1), [sp0] "r" (sp0),
677233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1)
678233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
679233d2500723e5594f3e7c70896ffeeef32b9c950ywan      }
680233d2500723e5594f3e7c70896ffeeef32b9c950ywan
681233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
682233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p6_l],    %[p6_l],    16   \n\t"
683233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p5_l],    %[p5_l],    16   \n\t"
684233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p4_l],    %[p4_l],    16   \n\t"
685233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p3_l],    %[p3_l],    16   \n\t"
686233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p2_l],    %[p2_l],    16   \n\t"
687233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p1_l],    %[p1_l],    16   \n\t"
688233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p0_l],    %[p0_l],    16   \n\t"
689233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q0_l],    %[q0_l],    16   \n\t"
690233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q1_l],    %[q1_l],    16   \n\t"
691233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q2_l],    %[q2_l],    16   \n\t"
692233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q3_l],    %[q3_l],    16   \n\t"
693233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q4_l],    %[q4_l],    16   \n\t"
694233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q5_l],    %[q5_l],    16   \n\t"
695233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q6_l],    %[q6_l],    16   \n\t"
696233d2500723e5594f3e7c70896ffeeef32b9c950ywan
697233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [q0_l] "+r" (q0_l), [q1_l] "+r" (q1_l), [q2_l] "+r" (q2_l),
698233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q3_l] "+r" (q3_l), [q4_l] "+r" (q4_l), [q5_l] "+r" (q5_l),
699233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q6_l] "+r" (q6_l), [p6_l] "+r" (p6_l), [p5_l] "+r" (p5_l),
700233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p4_l] "+r" (p4_l), [p3_l] "+r" (p3_l), [p2_l] "+r" (p2_l),
701233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p1_l] "+r" (p1_l), [p0_l] "+r" (p0_l)
702233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
703233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
704233d2500723e5594f3e7c70896ffeeef32b9c950ywan
705233d2500723e5594f3e7c70896ffeeef32b9c950ywan      __asm__ __volatile__ (
706233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p2_l_f1],   %[p2_l_f1],   16   \n\t"
707233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p1_l_f1],   %[p1_l_f1],   16   \n\t"
708233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p0_l_f1],   %[p0_l_f1],   16   \n\t"
709233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q0_l_f1],   %[q0_l_f1],   16   \n\t"
710233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q1_l_f1],   %[q1_l_f1],   16   \n\t"
711233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q2_l_f1],   %[q2_l_f1],   16   \n\t"
712233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p1_f0],     %[p1_f0],     8    \n\t"
713233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[p0_f0],     %[p0_f0],     8    \n\t"
714233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q0_f0],     %[q0_f0],     8    \n\t"
715233d2500723e5594f3e7c70896ffeeef32b9c950ywan          "srl      %[q1_f0],     %[q1_f0],     8    \n\t"
716233d2500723e5594f3e7c70896ffeeef32b9c950ywan
717233d2500723e5594f3e7c70896ffeeef32b9c950ywan          : [p2_l_f1] "+r" (p2_l_f1), [p1_l_f1] "+r" (p1_l_f1),
718233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p0_l_f1] "+r" (p0_l_f1), [q0_l_f1] "+r" (q0_l_f1),
719233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q1_l_f1] "+r" (q1_l_f1), [q2_l_f1] "+r" (q2_l_f1),
720233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
721233d2500723e5594f3e7c70896ffeeef32b9c950ywan            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
722233d2500723e5594f3e7c70896ffeeef32b9c950ywan          :
723233d2500723e5594f3e7c70896ffeeef32b9c950ywan      );
724233d2500723e5594f3e7c70896ffeeef32b9c950ywan
725233d2500723e5594f3e7c70896ffeeef32b9c950ywan      if (mask & flat & flat2 & 0xFF000000) {
726233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
727233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p6_l],    +3(%[sp6])    \n\t"
728233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p5_l],    +3(%[sp5])    \n\t"
729233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p4_l],    +3(%[sp4])    \n\t"
730233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p3_l],    +3(%[sp3])    \n\t"
731233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p2_l],    +3(%[sp2])    \n\t"
732233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p1_l],    +3(%[sp1])    \n\t"
733233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p0_l],    +3(%[sp0])    \n\t"
734233d2500723e5594f3e7c70896ffeeef32b9c950ywan
735233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
736233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p6_l] "r" (p6_l), [p5_l] "r" (p5_l), [p4_l] "r" (p4_l),
737233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p3_l] "r" (p3_l), [p2_l] "r" (p2_l), [p1_l] "r" (p1_l),
738233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p0_l] "r" (p0_l), [sp6] "r" (sp6), [sp5] "r" (sp5),
739233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp4] "r" (sp4), [sp3] "r" (sp3), [sp2] "r" (sp2),
740233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp1] "r" (sp1), [sp0] "r" (sp0)
741233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
742233d2500723e5594f3e7c70896ffeeef32b9c950ywan
743233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
744233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q0_l],    +3(%[sq0])    \n\t"
745233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q1_l],    +3(%[sq1])    \n\t"
746233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q2_l],    +3(%[sq2])    \n\t"
747233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q3_l],    +3(%[sq3])    \n\t"
748233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q4_l],    +3(%[sq4])    \n\t"
749233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q5_l],    +3(%[sq5])    \n\t"
750233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q6_l],    +3(%[sq6])    \n\t"
751233d2500723e5594f3e7c70896ffeeef32b9c950ywan
752233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
753233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [q0_l] "r" (q0_l), [q1_l] "r" (q1_l),
754233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q2_l] "r" (q2_l), [q3_l] "r" (q3_l),
755233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q4_l] "r" (q4_l), [q5_l] "r" (q5_l),
756233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2),
757233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq3] "r" (sq3), [sq4] "r" (sq4), [sq5] "r" (sq5),
758233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q6_l] "r" (q6_l), [sq6] "r" (sq6)
759233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
760233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & flat & 0xFF000000) {
761233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
762233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p2_l_f1],     +3(%[sp2])    \n\t"
763233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p1_l_f1],     +3(%[sp1])    \n\t"
764233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p0_l_f1],     +3(%[sp0])    \n\t"
765233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q0_l_f1],     +3(%[sq0])    \n\t"
766233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q1_l_f1],     +3(%[sq1])    \n\t"
767233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q2_l_f1],     +3(%[sq2])    \n\t"
768233d2500723e5594f3e7c70896ffeeef32b9c950ywan
769233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
770233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p2_l_f1] "r" (p2_l_f1), [p1_l_f1] "r" (p1_l_f1),
771233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [p0_l_f1] "r" (p0_l_f1), [q0_l_f1] "r" (q0_l_f1),
772233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q1_l_f1] "r" (q1_l_f1), [q2_l_f1] "r" (q2_l_f1),
773233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
774233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
775233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
776233d2500723e5594f3e7c70896ffeeef32b9c950ywan      } else if (mask & 0xFF000000) {
777233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
778233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p1_f0],   +3(%[sp1])    \n\t"
779233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[p0_f0],   +3(%[sp0])    \n\t"
780233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q0_f0],   +3(%[sq0])    \n\t"
781233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sb     %[q1_f0],   +3(%[sq1])    \n\t"
782233d2500723e5594f3e7c70896ffeeef32b9c950ywan
783233d2500723e5594f3e7c70896ffeeef32b9c950ywan            :
784233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
785233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
786233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sp1] "r" (sp1), [sp0] "r" (sp0),
787233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [sq0] "r" (sq0), [sq1] "r" (sq1)
788233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
789233d2500723e5594f3e7c70896ffeeef32b9c950ywan      }
790233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
791233d2500723e5594f3e7c70896ffeeef32b9c950ywan
792233d2500723e5594f3e7c70896ffeeef32b9c950ywan    s = s + 4;
793233d2500723e5594f3e7c70896ffeeef32b9c950ywan  }
794233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
795233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif  // #if HAVE_DSPR2
796