19b35249446b07f40ac5fcc3205f2c048616efacchkuang/*
29b35249446b07f40ac5fcc3205f2c048616efacchkuang *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
39b35249446b07f40ac5fcc3205f2c048616efacchkuang *
49b35249446b07f40ac5fcc3205f2c048616efacchkuang *  Use of this source code is governed by a BSD-style license
59b35249446b07f40ac5fcc3205f2c048616efacchkuang *  that can be found in the LICENSE file in the root of the source
69b35249446b07f40ac5fcc3205f2c048616efacchkuang *  tree. An additional intellectual property rights grant can be found
79b35249446b07f40ac5fcc3205f2c048616efacchkuang *  in the file PATENTS.  All contributing project authors may
89b35249446b07f40ac5fcc3205f2c048616efacchkuang *  be found in the AUTHORS file in the root of the source tree.
99b35249446b07f40ac5fcc3205f2c048616efacchkuang */
109b35249446b07f40ac5fcc3205f2c048616efacchkuang
119b35249446b07f40ac5fcc3205f2c048616efacchkuang#include <stdlib.h>
129b35249446b07f40ac5fcc3205f2c048616efacchkuang
139b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "./vp9_rtcd.h"
149b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_common.h"
159b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_loopfilter.h"
169b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_onyxc_int.h"
179b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
189b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h"
199b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h"
209b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h"
219b35249446b07f40ac5fcc3205f2c048616efacchkuang
229b35249446b07f40ac5fcc3205f2c048616efacchkuang#if HAVE_DSPR2
23b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianvoid vp9_lpf_horizontal_16_dspr2(unsigned char *s,
24b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                 int pitch,
25b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                 const uint8_t *blimit,
26b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                 const uint8_t *limit,
27b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                 const uint8_t *thresh,
28b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                 int count) {
299b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  mask;
309b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  hev, flat, flat2;
319b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint8_t   i;
329b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint8_t   *sp7, *sp6, *sp5, *sp4, *sp3, *sp2, *sp1, *sp0;
339b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint8_t   *sq0, *sq1, *sq2, *sq3, *sq4, *sq5, *sq6, *sq7;
349b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  thresh_vec, flimit_vec, limit_vec;
359b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  uflimit, ulimit, uthresh;
369b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
379b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  p1_f0, p0_f0, q0_f0, q1_f0;
389b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l;
399b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l;
409b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r;
419b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r;
429b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1;
439b35249446b07f40ac5fcc3205f2c048616efacchkuang  uint32_t  q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1;
449b35249446b07f40ac5fcc3205f2c048616efacchkuang
459b35249446b07f40ac5fcc3205f2c048616efacchkuang  uflimit = *blimit;
469b35249446b07f40ac5fcc3205f2c048616efacchkuang  ulimit  = *limit;
479b35249446b07f40ac5fcc3205f2c048616efacchkuang  uthresh = *thresh;
489b35249446b07f40ac5fcc3205f2c048616efacchkuang
499b35249446b07f40ac5fcc3205f2c048616efacchkuang  /* create quad-byte */
509b35249446b07f40ac5fcc3205f2c048616efacchkuang  __asm__ __volatile__ (
519b35249446b07f40ac5fcc3205f2c048616efacchkuang      "replv.qb       %[thresh_vec],    %[uthresh]      \n\t"
529b35249446b07f40ac5fcc3205f2c048616efacchkuang      "replv.qb       %[flimit_vec],    %[uflimit]      \n\t"
539b35249446b07f40ac5fcc3205f2c048616efacchkuang      "replv.qb       %[limit_vec],     %[ulimit]       \n\t"
549b35249446b07f40ac5fcc3205f2c048616efacchkuang
559b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec),
569b35249446b07f40ac5fcc3205f2c048616efacchkuang        [limit_vec] "=r" (limit_vec)
579b35249446b07f40ac5fcc3205f2c048616efacchkuang      : [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit)
589b35249446b07f40ac5fcc3205f2c048616efacchkuang  );
599b35249446b07f40ac5fcc3205f2c048616efacchkuang
609b35249446b07f40ac5fcc3205f2c048616efacchkuang  /* prefetch data for store */
619b35249446b07f40ac5fcc3205f2c048616efacchkuang  vp9_prefetch_store(s);
629b35249446b07f40ac5fcc3205f2c048616efacchkuang
639b35249446b07f40ac5fcc3205f2c048616efacchkuang  for (i = 0; i < (2 * count); i++) {
649b35249446b07f40ac5fcc3205f2c048616efacchkuang    sp7 = s - (pitch << 3);
659b35249446b07f40ac5fcc3205f2c048616efacchkuang    sp6 = sp7 + pitch;
669b35249446b07f40ac5fcc3205f2c048616efacchkuang    sp5 = sp6 + pitch;
679b35249446b07f40ac5fcc3205f2c048616efacchkuang    sp4 = sp5 + pitch;
689b35249446b07f40ac5fcc3205f2c048616efacchkuang    sp3 = sp4 + pitch;
699b35249446b07f40ac5fcc3205f2c048616efacchkuang    sp2 = sp3 + pitch;
709b35249446b07f40ac5fcc3205f2c048616efacchkuang    sp1 = sp2 + pitch;
719b35249446b07f40ac5fcc3205f2c048616efacchkuang    sp0 = sp1 + pitch;
729b35249446b07f40ac5fcc3205f2c048616efacchkuang    sq0 = s;
739b35249446b07f40ac5fcc3205f2c048616efacchkuang    sq1 = s + pitch;
749b35249446b07f40ac5fcc3205f2c048616efacchkuang    sq2 = sq1 + pitch;
759b35249446b07f40ac5fcc3205f2c048616efacchkuang    sq3 = sq2 + pitch;
769b35249446b07f40ac5fcc3205f2c048616efacchkuang    sq4 = sq3 + pitch;
779b35249446b07f40ac5fcc3205f2c048616efacchkuang    sq5 = sq4 + pitch;
789b35249446b07f40ac5fcc3205f2c048616efacchkuang    sq6 = sq5 + pitch;
799b35249446b07f40ac5fcc3205f2c048616efacchkuang    sq7 = sq6 + pitch;
809b35249446b07f40ac5fcc3205f2c048616efacchkuang
819b35249446b07f40ac5fcc3205f2c048616efacchkuang    __asm__ __volatile__ (
829b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[p7],      (%[sp7])            \n\t"
839b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[p6],      (%[sp6])            \n\t"
849b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[p5],      (%[sp5])            \n\t"
859b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[p4],      (%[sp4])            \n\t"
869b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[p3],      (%[sp3])            \n\t"
879b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[p2],      (%[sp2])            \n\t"
889b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[p1],      (%[sp1])            \n\t"
899b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[p0],      (%[sp0])            \n\t"
909b35249446b07f40ac5fcc3205f2c048616efacchkuang
919b35249446b07f40ac5fcc3205f2c048616efacchkuang        : [p3] "=&r" (p3), [p2] "=&r" (p2), [p1] "=&r" (p1), [p0] "=&r" (p0),
929b35249446b07f40ac5fcc3205f2c048616efacchkuang          [p7] "=&r" (p7), [p6] "=&r" (p6), [p5] "=&r" (p5), [p4] "=&r" (p4)
939b35249446b07f40ac5fcc3205f2c048616efacchkuang        : [sp3] "r" (sp3), [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
949b35249446b07f40ac5fcc3205f2c048616efacchkuang          [sp4] "r" (sp4), [sp5] "r" (sp5), [sp6] "r" (sp6), [sp7] "r" (sp7)
959b35249446b07f40ac5fcc3205f2c048616efacchkuang    );
969b35249446b07f40ac5fcc3205f2c048616efacchkuang
979b35249446b07f40ac5fcc3205f2c048616efacchkuang    __asm__ __volatile__ (
989b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[q0],      (%[sq0])            \n\t"
999b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[q1],      (%[sq1])            \n\t"
1009b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[q2],      (%[sq2])            \n\t"
1019b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[q3],      (%[sq3])            \n\t"
1029b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[q4],      (%[sq4])            \n\t"
1039b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[q5],      (%[sq5])            \n\t"
1049b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[q6],      (%[sq6])            \n\t"
1059b35249446b07f40ac5fcc3205f2c048616efacchkuang        "lw     %[q7],      (%[sq7])            \n\t"
1069b35249446b07f40ac5fcc3205f2c048616efacchkuang
1079b35249446b07f40ac5fcc3205f2c048616efacchkuang        : [q3] "=&r" (q3), [q2] "=&r" (q2), [q1] "=&r" (q1), [q0] "=&r" (q0),
1089b35249446b07f40ac5fcc3205f2c048616efacchkuang          [q7] "=&r" (q7), [q6] "=&r" (q6), [q5] "=&r" (q5), [q4] "=&r" (q4)
1099b35249446b07f40ac5fcc3205f2c048616efacchkuang        : [sq3] "r" (sq3), [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0),
1109b35249446b07f40ac5fcc3205f2c048616efacchkuang          [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6), [sq7] "r" (sq7)
1119b35249446b07f40ac5fcc3205f2c048616efacchkuang    );
1129b35249446b07f40ac5fcc3205f2c048616efacchkuang
1139b35249446b07f40ac5fcc3205f2c048616efacchkuang    vp9_filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec,
1149b35249446b07f40ac5fcc3205f2c048616efacchkuang                                        p1, p0, p3, p2, q0, q1, q2, q3,
1159b35249446b07f40ac5fcc3205f2c048616efacchkuang                                        &hev, &mask, &flat);
1169b35249446b07f40ac5fcc3205f2c048616efacchkuang
1179b35249446b07f40ac5fcc3205f2c048616efacchkuang    vp9_flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
1189b35249446b07f40ac5fcc3205f2c048616efacchkuang
1199b35249446b07f40ac5fcc3205f2c048616efacchkuang    /* f0 */
1209b35249446b07f40ac5fcc3205f2c048616efacchkuang    if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
1219b35249446b07f40ac5fcc3205f2c048616efacchkuang        ((flat2 != 0) && (flat == 0) && (mask != 0))) {
1229b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
1239b35249446b07f40ac5fcc3205f2c048616efacchkuang                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);
1249b35249446b07f40ac5fcc3205f2c048616efacchkuang
1259b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
1269b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw       %[p1_f0],   (%[sp1])            \n\t"
1279b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw       %[p0_f0],   (%[sp0])            \n\t"
1289b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw       %[q0_f0],   (%[sq0])            \n\t"
1299b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw       %[q1_f0],   (%[sq1])            \n\t"
1309b35249446b07f40ac5fcc3205f2c048616efacchkuang
1319b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
1329b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
1339b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
1349b35249446b07f40ac5fcc3205f2c048616efacchkuang            [sp1] "r" (sp1), [sp0] "r" (sp0),
1359b35249446b07f40ac5fcc3205f2c048616efacchkuang            [sq0] "r" (sq0), [sq1] "r" (sq1)
1369b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
1379b35249446b07f40ac5fcc3205f2c048616efacchkuang    } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
1389b35249446b07f40ac5fcc3205f2c048616efacchkuang               (mask == 0xFFFFFFFF)) {
1399b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* f2 */
1409b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_LEFT_0TO3()
1419b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_LEFT_4TO7()
1429b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
1439b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &p3_l, &p2_l, &p1_l, &p0_l,
1449b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &q0_l, &q1_l, &q2_l, &q3_l,
1459b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &q4_l, &q5_l, &q6_l, &q7_l);
1469b35249446b07f40ac5fcc3205f2c048616efacchkuang
1479b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_RIGHT_0TO3()
1489b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_RIGHT_4TO7()
1499b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
1509b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &p3_r, &p2_r, &p1_r, &p0_r,
1519b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &q0_r, &q1_r, &q2_r, &q3_r,
1529b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &q4_r, &q5_r, &q6_r, &q7_r);
1539b35249446b07f40ac5fcc3205f2c048616efacchkuang
1549b35249446b07f40ac5fcc3205f2c048616efacchkuang      COMBINE_LEFT_RIGHT_0TO2()
1559b35249446b07f40ac5fcc3205f2c048616efacchkuang      COMBINE_LEFT_RIGHT_3TO6()
1569b35249446b07f40ac5fcc3205f2c048616efacchkuang
1579b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
1589b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p6], (%[sp6])    \n\t"
1599b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p5], (%[sp5])    \n\t"
1609b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p4], (%[sp4])    \n\t"
1619b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p3], (%[sp3])    \n\t"
1629b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p2], (%[sp2])    \n\t"
1639b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p1], (%[sp1])    \n\t"
1649b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p0], (%[sp0])    \n\t"
1659b35249446b07f40ac5fcc3205f2c048616efacchkuang
1669b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
1679b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [p6] "r" (p6), [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3),
1689b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0),
1699b35249446b07f40ac5fcc3205f2c048616efacchkuang            [sp6] "r" (sp6), [sp5] "r" (sp5), [sp4] "r" (sp4), [sp3] "r" (sp3),
1709b35249446b07f40ac5fcc3205f2c048616efacchkuang            [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0)
1719b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
1729b35249446b07f40ac5fcc3205f2c048616efacchkuang
1739b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
1749b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q6], (%[sq6])    \n\t"
1759b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q5], (%[sq5])    \n\t"
1769b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q4], (%[sq4])    \n\t"
1779b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q3], (%[sq3])    \n\t"
1789b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q2], (%[sq2])    \n\t"
1799b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q1], (%[sq1])    \n\t"
1809b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q0], (%[sq0])    \n\t"
1819b35249446b07f40ac5fcc3205f2c048616efacchkuang
1829b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
1839b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [q6] "r" (q6), [q5] "r" (q5), [q4] "r" (q4), [q3] "r" (q3),
1849b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q2] "r" (q2), [q1] "r" (q1), [q0] "r" (q0),
1859b35249446b07f40ac5fcc3205f2c048616efacchkuang            [sq6] "r" (sq6), [sq5] "r" (sq5), [sq4] "r" (sq4), [sq3] "r" (sq3),
1869b35249446b07f40ac5fcc3205f2c048616efacchkuang            [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0)
1879b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
1889b35249446b07f40ac5fcc3205f2c048616efacchkuang    } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
1899b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* f1 */
1909b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* left 2 element operation */
1919b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_LEFT_0TO3()
1929b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
1939b35249446b07f40ac5fcc3205f2c048616efacchkuang                         &q0_l, &q1_l, &q2_l, &q3_l);
1949b35249446b07f40ac5fcc3205f2c048616efacchkuang
1959b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* right 2 element operation */
1969b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_RIGHT_0TO3()
1979b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
1989b35249446b07f40ac5fcc3205f2c048616efacchkuang                         &q0_r, &q1_r, &q2_r, &q3_r);
1999b35249446b07f40ac5fcc3205f2c048616efacchkuang
2009b35249446b07f40ac5fcc3205f2c048616efacchkuang      COMBINE_LEFT_RIGHT_0TO2()
2019b35249446b07f40ac5fcc3205f2c048616efacchkuang
2029b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
2039b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p2], (%[sp2])    \n\t"
2049b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p1], (%[sp1])    \n\t"
2059b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[p0], (%[sp0])    \n\t"
2069b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q0], (%[sq0])    \n\t"
2079b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q1], (%[sq1])    \n\t"
2089b35249446b07f40ac5fcc3205f2c048616efacchkuang          "sw         %[q2], (%[sq2])    \n\t"
2099b35249446b07f40ac5fcc3205f2c048616efacchkuang
2109b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
2119b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0),
2129b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0] "r" (q0), [q1] "r" (q1), [q2] "r" (q2),
2139b35249446b07f40ac5fcc3205f2c048616efacchkuang            [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
2149b35249446b07f40ac5fcc3205f2c048616efacchkuang            [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
2159b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
2169b35249446b07f40ac5fcc3205f2c048616efacchkuang    } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
2179b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* f0+f1 */
2189b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
2199b35249446b07f40ac5fcc3205f2c048616efacchkuang                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);
2209b35249446b07f40ac5fcc3205f2c048616efacchkuang
2219b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* left 2 element operation */
2229b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_LEFT_0TO3()
2239b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l,
2249b35249446b07f40ac5fcc3205f2c048616efacchkuang                         &q0_l, &q1_l, &q2_l, &q3_l);
2259b35249446b07f40ac5fcc3205f2c048616efacchkuang
2269b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* right 2 element operation */
2279b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_RIGHT_0TO3()
2289b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r,
2299b35249446b07f40ac5fcc3205f2c048616efacchkuang                         &q0_r, &q1_r, &q2_r, &q3_r);
2309b35249446b07f40ac5fcc3205f2c048616efacchkuang
2319b35249446b07f40ac5fcc3205f2c048616efacchkuang      if (mask & flat & 0x000000FF) {
2329b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
2339b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_r],  (%[sp2])    \n\t"
2349b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_r],  (%[sp1])    \n\t"
2359b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_r],  (%[sp0])    \n\t"
2369b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_r],  (%[sq0])    \n\t"
2379b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_r],  (%[sq1])    \n\t"
2389b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_r],  (%[sq2])    \n\t"
2399b35249446b07f40ac5fcc3205f2c048616efacchkuang
2409b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
2419b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p2_r] "r" (p2_r), [p1_r] "r" (p1_r), [p0_r] "r" (p0_r),
2429b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r),
2439b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
2449b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
2459b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
2469b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & 0x000000FF) {
2479b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
2489b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_f0],  (%[sp1])    \n\t"
2499b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_f0],  (%[sp0])    \n\t"
2509b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_f0],  (%[sq0])    \n\t"
2519b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_f0],  (%[sq1])    \n\t"
2529b35249446b07f40ac5fcc3205f2c048616efacchkuang
2539b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
2549b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
2559b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
2569b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp1] "r" (sp1), [sp0] "r" (sp0),
2579b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1)
2589b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
2599b35249446b07f40ac5fcc3205f2c048616efacchkuang      }
2609b35249446b07f40ac5fcc3205f2c048616efacchkuang
2619b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
2629b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p2_r],    %[p2_r],    16      \n\t"
2639b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p1_r],    %[p1_r],    16      \n\t"
2649b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p0_r],    %[p0_r],    16      \n\t"
2659b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q0_r],    %[q0_r],    16      \n\t"
2669b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q1_r],    %[q1_r],    16      \n\t"
2679b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q2_r],    %[q2_r],    16      \n\t"
2689b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
2699b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
2709b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
2719b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
2729b35249446b07f40ac5fcc3205f2c048616efacchkuang
2739b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [p2_r] "+r" (p2_r), [p1_r] "+r" (p1_r), [p0_r] "+r" (p0_r),
2749b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0_r] "+r" (q0_r), [q1_r] "+r" (q1_r), [q2_r] "+r" (q2_r),
2759b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
2769b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
2779b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
2789b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
2799b35249446b07f40ac5fcc3205f2c048616efacchkuang
2809b35249446b07f40ac5fcc3205f2c048616efacchkuang      if (mask & flat & 0x0000FF00) {
2819b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
2829b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_r],  +1(%[sp2])    \n\t"
2839b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_r],  +1(%[sp1])    \n\t"
2849b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_r],  +1(%[sp0])    \n\t"
2859b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_r],  +1(%[sq0])    \n\t"
2869b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_r],  +1(%[sq1])    \n\t"
2879b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_r],  +1(%[sq2])    \n\t"
2889b35249446b07f40ac5fcc3205f2c048616efacchkuang
2899b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
2909b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p2_r] "r" (p2_r), [p1_r] "r" (p1_r), [p0_r] "r" (p0_r),
2919b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r),
2929b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
2939b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
2949b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
2959b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & 0x0000FF00) {
2969b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
2979b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_f0],  +1(%[sp1])    \n\t"
2989b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_f0],  +1(%[sp0])    \n\t"
2999b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_f0],  +1(%[sq0])    \n\t"
3009b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_f0],  +1(%[sq1])    \n\t"
3019b35249446b07f40ac5fcc3205f2c048616efacchkuang
3029b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
3039b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
3049b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
3059b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp1] "r" (sp1), [sp0] "r" (sp0),
3069b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1)
3079b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
3089b35249446b07f40ac5fcc3205f2c048616efacchkuang      }
3099b35249446b07f40ac5fcc3205f2c048616efacchkuang
3109b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
3119b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p1_f0],   %[p1_f0],   8     \n\t"
3129b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p0_f0],   %[p0_f0],   8     \n\t"
3139b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q0_f0],   %[q0_f0],   8     \n\t"
3149b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q1_f0],   %[q1_f0],   8     \n\t"
3159b35249446b07f40ac5fcc3205f2c048616efacchkuang
3169b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
3179b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
3189b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
3199b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
3209b35249446b07f40ac5fcc3205f2c048616efacchkuang
3219b35249446b07f40ac5fcc3205f2c048616efacchkuang      if (mask & flat & 0x00FF0000) {
3229b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
3239b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_l],  +2(%[sp2])    \n\t"
3249b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_l],  +2(%[sp1])    \n\t"
3259b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_l],  +2(%[sp0])    \n\t"
3269b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_l],  +2(%[sq0])    \n\t"
3279b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_l],  +2(%[sq1])    \n\t"
3289b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_l],  +2(%[sq2])    \n\t"
3299b35249446b07f40ac5fcc3205f2c048616efacchkuang
3309b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
3319b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p2_l] "r" (p2_l), [p1_l] "r" (p1_l), [p0_l] "r" (p0_l),
3329b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), [q2_l] "r" (q2_l),
3339b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
3349b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
3359b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
3369b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & 0x00FF0000) {
3379b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
3389b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_f0],  +2(%[sp1])    \n\t"
3399b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_f0],  +2(%[sp0])    \n\t"
3409b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_f0],  +2(%[sq0])    \n\t"
3419b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_f0],  +2(%[sq1])    \n\t"
3429b35249446b07f40ac5fcc3205f2c048616efacchkuang
3439b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
3449b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
3459b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
3469b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp1] "r" (sp1), [sp0] "r" (sp0),
3479b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1)
3489b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
3499b35249446b07f40ac5fcc3205f2c048616efacchkuang      }
3509b35249446b07f40ac5fcc3205f2c048616efacchkuang
3519b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
3529b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p2_l],    %[p2_l],    16      \n\t"
3539b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p1_l],    %[p1_l],    16      \n\t"
3549b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p0_l],    %[p0_l],    16      \n\t"
3559b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q0_l],    %[q0_l],    16      \n\t"
3569b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q1_l],    %[q1_l],    16      \n\t"
3579b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q2_l],    %[q2_l],    16      \n\t"
3589b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
3599b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
3609b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
3619b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
3629b35249446b07f40ac5fcc3205f2c048616efacchkuang
3639b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [p2_l] "+r" (p2_l), [p1_l] "+r" (p1_l), [p0_l] "+r" (p0_l),
3649b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0_l] "+r" (q0_l), [q1_l] "+r" (q1_l), [q2_l] "+r" (q2_l),
3659b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
3669b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
3679b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
3689b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
3699b35249446b07f40ac5fcc3205f2c048616efacchkuang
3709b35249446b07f40ac5fcc3205f2c048616efacchkuang      if (mask & flat & 0xFF000000) {
3719b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
3729b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_l],  +3(%[sp2])    \n\t"
3739b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_l],  +3(%[sp1])    \n\t"
3749b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_l],  +3(%[sp0])    \n\t"
3759b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_l],  +3(%[sq0])    \n\t"
3769b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_l],  +3(%[sq1])    \n\t"
3779b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_l],  +3(%[sq2])    \n\t"
3789b35249446b07f40ac5fcc3205f2c048616efacchkuang
3799b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
3809b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p2_l] "r" (p2_l), [p1_l] "r" (p1_l), [p0_l] "r" (p0_l),
3819b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), [q2_l] "r" (q2_l),
3829b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
3839b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
3849b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
3859b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & 0xFF000000) {
3869b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
3879b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_f0],  +3(%[sp1])    \n\t"
3889b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_f0],  +3(%[sp0])    \n\t"
3899b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_f0],  +3(%[sq0])    \n\t"
3909b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_f0],  +3(%[sq1])    \n\t"
3919b35249446b07f40ac5fcc3205f2c048616efacchkuang
3929b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
3939b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
3949b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
3959b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp1] "r" (sp1), [sp0] "r" (sp0),
3969b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1)
3979b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
3989b35249446b07f40ac5fcc3205f2c048616efacchkuang      }
3999b35249446b07f40ac5fcc3205f2c048616efacchkuang    } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
4009b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* f0 + f1 + f2 */
4019b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* f0  function */
4029b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_filter1_dspr2(mask, hev, p1, p0, q0, q1,
4039b35249446b07f40ac5fcc3205f2c048616efacchkuang                        &p1_f0, &p0_f0, &q0_f0, &q1_f0);
4049b35249446b07f40ac5fcc3205f2c048616efacchkuang
4059b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* f1  function */
4069b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* left 2 element operation */
4079b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_LEFT_0TO3()
4089b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l,
4099b35249446b07f40ac5fcc3205f2c048616efacchkuang                          q0_l, q1_l, q2_l, q3_l,
4109b35249446b07f40ac5fcc3205f2c048616efacchkuang                          &p2_l_f1, &p1_l_f1, &p0_l_f1,
4119b35249446b07f40ac5fcc3205f2c048616efacchkuang                          &q0_l_f1, &q1_l_f1, &q2_l_f1);
4129b35249446b07f40ac5fcc3205f2c048616efacchkuang
4139b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* right 2 element operation */
4149b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_RIGHT_0TO3()
4159b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r,
4169b35249446b07f40ac5fcc3205f2c048616efacchkuang                          q0_r, q1_r, q2_r, q3_r,
4179b35249446b07f40ac5fcc3205f2c048616efacchkuang                          &p2_r_f1, &p1_r_f1, &p0_r_f1,
4189b35249446b07f40ac5fcc3205f2c048616efacchkuang                          &q0_r_f1, &q1_r_f1, &q2_r_f1);
4199b35249446b07f40ac5fcc3205f2c048616efacchkuang
4209b35249446b07f40ac5fcc3205f2c048616efacchkuang      /* f2  function */
4219b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_LEFT_4TO7()
4229b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l,
4239b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &p3_l, &p2_l, &p1_l, &p0_l,
4249b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &q0_l, &q1_l, &q2_l, &q3_l,
4259b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &q4_l, &q5_l, &q6_l, &q7_l);
4269b35249446b07f40ac5fcc3205f2c048616efacchkuang
4279b35249446b07f40ac5fcc3205f2c048616efacchkuang      PACK_RIGHT_4TO7()
4289b35249446b07f40ac5fcc3205f2c048616efacchkuang      vp9_wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r,
4299b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &p3_r, &p2_r, &p1_r, &p0_r,
4309b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &q0_r, &q1_r, &q2_r, &q3_r,
4319b35249446b07f40ac5fcc3205f2c048616efacchkuang                              &q4_r, &q5_r, &q6_r, &q7_r);
4329b35249446b07f40ac5fcc3205f2c048616efacchkuang
4339b35249446b07f40ac5fcc3205f2c048616efacchkuang      if (mask & flat & flat2 & 0x000000FF) {
4349b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
4359b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p6_r],  (%[sp6])    \n\t"
4369b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p5_r],  (%[sp5])    \n\t"
4379b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p4_r],  (%[sp4])    \n\t"
4389b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p3_r],  (%[sp3])    \n\t"
4399b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_r],  (%[sp2])    \n\t"
4409b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_r],  (%[sp1])    \n\t"
4419b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_r],  (%[sp0])    \n\t"
4429b35249446b07f40ac5fcc3205f2c048616efacchkuang
4439b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
4449b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p6_r] "r" (p6_r), [p5_r] "r" (p5_r), [p4_r] "r" (p4_r),
4459b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p3_r] "r" (p3_r), [p2_r] "r" (p2_r), [p1_r] "r" (p1_r),
4469b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp6] "r" (sp6), [sp5] "r" (sp5), [sp4] "r" (sp4),
4479b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp3] "r" (sp3), [sp2] "r" (sp2), [sp1] "r" (sp1),
4489b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p0_r] "r" (p0_r), [sp0] "r" (sp0)
4499b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
4509b35249446b07f40ac5fcc3205f2c048616efacchkuang
4519b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
4529b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_r],  (%[sq0])    \n\t"
4539b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_r],  (%[sq1])    \n\t"
4549b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_r],  (%[sq2])    \n\t"
4559b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q3_r],  (%[sq3])    \n\t"
4569b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q4_r],  (%[sq4])    \n\t"
4579b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q5_r],  (%[sq5])    \n\t"
4589b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q6_r],  (%[sq6])    \n\t"
4599b35249446b07f40ac5fcc3205f2c048616efacchkuang
4609b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
4619b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r),
4629b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q3_r] "r" (q3_r), [q4_r] "r" (q4_r), [q5_r] "r" (q5_r),
4639b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q6_r] "r" (q6_r),
4649b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2),
4659b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq3] "r" (sq3), [sq4] "r" (sq4), [sq5] "r" (sq5),
4669b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq6] "r" (sq6)
4679b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
4689b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & flat & 0x000000FF) {
4699b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
4709b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_r_f1],  (%[sp2])    \n\t"
4719b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_r_f1],  (%[sp1])    \n\t"
4729b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_r_f1],  (%[sp0])    \n\t"
4739b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_r_f1],  (%[sq0])    \n\t"
4749b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_r_f1],  (%[sq1])    \n\t"
4759b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_r_f1],  (%[sq2])    \n\t"
4769b35249446b07f40ac5fcc3205f2c048616efacchkuang
4779b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
4789b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p2_r_f1] "r" (p2_r_f1), [p1_r_f1] "r" (p1_r_f1),
4799b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p0_r_f1] "r" (p0_r_f1), [q0_r_f1] "r" (q0_r_f1),
4809b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q1_r_f1] "r" (q1_r_f1), [q2_r_f1] "r" (q2_r_f1),
4819b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
4829b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
4839b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
4849b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & 0x000000FF) {
4859b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
4869b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_f0],  (%[sp1])    \n\t"
4879b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_f0],  (%[sp0])    \n\t"
4889b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_f0],  (%[sq0])    \n\t"
4899b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_f0],  (%[sq1])    \n\t"
4909b35249446b07f40ac5fcc3205f2c048616efacchkuang
4919b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
4929b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), [q0_f0] "r" (q0_f0),
4939b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q1_f0] "r" (q1_f0), [sp1] "r" (sp1), [sp0] "r" (sp0),
4949b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1)
4959b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
4969b35249446b07f40ac5fcc3205f2c048616efacchkuang      }
4979b35249446b07f40ac5fcc3205f2c048616efacchkuang
4989b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
4999b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p6_r], %[p6_r], 16     \n\t"
5009b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p5_r], %[p5_r], 16     \n\t"
5019b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p4_r], %[p4_r], 16     \n\t"
5029b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p3_r], %[p3_r], 16     \n\t"
5039b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p2_r], %[p2_r], 16     \n\t"
5049b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p1_r], %[p1_r], 16     \n\t"
5059b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p0_r], %[p0_r], 16     \n\t"
5069b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q0_r], %[q0_r], 16     \n\t"
5079b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q1_r], %[q1_r], 16     \n\t"
5089b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q2_r], %[q2_r], 16     \n\t"
5099b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q3_r], %[q3_r], 16     \n\t"
5109b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q4_r], %[q4_r], 16     \n\t"
5119b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q5_r], %[q5_r], 16     \n\t"
5129b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q6_r], %[q6_r], 16     \n\t"
5139b35249446b07f40ac5fcc3205f2c048616efacchkuang
5149b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [q0_r] "+r" (q0_r), [q1_r] "+r" (q1_r), [q2_r] "+r" (q2_r),
5159b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q3_r] "+r" (q3_r), [q4_r] "+r" (q4_r), [q5_r] "+r" (q5_r),
5169b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p6_r] "+r" (p6_r), [p5_r] "+r" (p5_r), [p4_r] "+r" (p4_r),
5179b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p3_r] "+r" (p3_r), [p2_r] "+r" (p2_r), [p1_r] "+r" (p1_r),
5189b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q6_r] "+r" (q6_r), [p0_r] "+r" (p0_r)
5199b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
5209b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
5219b35249446b07f40ac5fcc3205f2c048616efacchkuang
5229b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
5239b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p2_r_f1], %[p2_r_f1], 16     \n\t"
5249b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p1_r_f1], %[p1_r_f1], 16     \n\t"
5259b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p0_r_f1], %[p0_r_f1], 16     \n\t"
5269b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q0_r_f1], %[q0_r_f1], 16     \n\t"
5279b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q1_r_f1], %[q1_r_f1], 16     \n\t"
5289b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q2_r_f1], %[q2_r_f1], 16     \n\t"
5299b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p1_f0],   %[p1_f0],   8      \n\t"
5309b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p0_f0],   %[p0_f0],   8      \n\t"
5319b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q0_f0],   %[q0_f0],   8      \n\t"
5329b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q1_f0],   %[q1_f0],   8      \n\t"
5339b35249446b07f40ac5fcc3205f2c048616efacchkuang
5349b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [p2_r_f1] "+r" (p2_r_f1), [p1_r_f1] "+r" (p1_r_f1),
5359b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p0_r_f1] "+r" (p0_r_f1), [q0_r_f1] "+r" (q0_r_f1),
5369b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q1_r_f1] "+r" (q1_r_f1), [q2_r_f1] "+r" (q2_r_f1),
5379b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
5389b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
5399b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
5409b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
5419b35249446b07f40ac5fcc3205f2c048616efacchkuang
5429b35249446b07f40ac5fcc3205f2c048616efacchkuang      if (mask & flat & flat2 & 0x0000FF00) {
5439b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
5449b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p6_r],  +1(%[sp6])    \n\t"
5459b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p5_r],  +1(%[sp5])    \n\t"
5469b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p4_r],  +1(%[sp4])    \n\t"
5479b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p3_r],  +1(%[sp3])    \n\t"
5489b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_r],  +1(%[sp2])    \n\t"
5499b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_r],  +1(%[sp1])    \n\t"
5509b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_r],  +1(%[sp0])    \n\t"
5519b35249446b07f40ac5fcc3205f2c048616efacchkuang
5529b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
5539b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p6_r] "r" (p6_r), [p5_r] "r" (p5_r), [p4_r] "r" (p4_r),
5549b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p3_r] "r" (p3_r), [p2_r] "r" (p2_r), [p1_r] "r" (p1_r),
5559b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p0_r] "r" (p0_r), [sp6] "r" (sp6), [sp5] "r" (sp5),
5569b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp4] "r" (sp4), [sp3] "r" (sp3),
5579b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0)
5589b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
5599b35249446b07f40ac5fcc3205f2c048616efacchkuang
5609b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
5619b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_r],  +1(%[sq0])    \n\t"
5629b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_r],  +1(%[sq1])    \n\t"
5639b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_r],  +1(%[sq2])    \n\t"
5649b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q3_r],  +1(%[sq3])    \n\t"
5659b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q4_r],  +1(%[sq4])    \n\t"
5669b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q5_r],  +1(%[sq5])    \n\t"
5679b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q6_r],  +1(%[sq6])    \n\t"
5689b35249446b07f40ac5fcc3205f2c048616efacchkuang
5699b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
5709b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r),
5719b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q3_r] "r" (q3_r), [q4_r] "r" (q4_r), [q5_r] "r" (q5_r),
5729b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q6_r] "r" (q6_r), [sq0] "r" (sq0), [sq1] "r" (sq1),
5739b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq2] "r" (sq2), [sq3] "r" (sq3),
5749b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6)
5759b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
5769b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & flat & 0x0000FF00) {
5779b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
5789b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_r_f1],  +1(%[sp2])    \n\t"
5799b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_r_f1],  +1(%[sp1])    \n\t"
5809b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_r_f1],  +1(%[sp0])    \n\t"
5819b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_r_f1],  +1(%[sq0])    \n\t"
5829b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_r_f1],  +1(%[sq1])    \n\t"
5839b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_r_f1],  +1(%[sq2])    \n\t"
5849b35249446b07f40ac5fcc3205f2c048616efacchkuang
5859b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
5869b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p2_r_f1] "r" (p2_r_f1), [p1_r_f1] "r" (p1_r_f1),
5879b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p0_r_f1] "r" (p0_r_f1), [q0_r_f1] "r" (q0_r_f1),
5889b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q1_r_f1] "r" (q1_r_f1), [q2_r_f1] "r" (q2_r_f1),
5899b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
5909b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
5919b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
5929b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & 0x0000FF00) {
5939b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
5949b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_f0],  +1(%[sp1])    \n\t"
5959b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_f0],  +1(%[sp0])    \n\t"
5969b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_f0],  +1(%[sq0])    \n\t"
5979b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_f0],  +1(%[sq1])    \n\t"
5989b35249446b07f40ac5fcc3205f2c048616efacchkuang
5999b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
6009b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), [q0_f0] "r" (q0_f0),
6019b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q1_f0] "r" (q1_f0), [sp1] "r" (sp1), [sp0] "r" (sp0),
6029b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1)
6039b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
6049b35249446b07f40ac5fcc3205f2c048616efacchkuang      }
6059b35249446b07f40ac5fcc3205f2c048616efacchkuang
6069b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
6079b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p1_f0], %[p1_f0], 8     \n\t"
6089b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[p0_f0], %[p0_f0], 8     \n\t"
6099b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q0_f0], %[q0_f0], 8     \n\t"
6109b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl        %[q1_f0], %[q1_f0], 8     \n\t"
6119b35249446b07f40ac5fcc3205f2c048616efacchkuang
6129b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
6139b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
6149b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
6159b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
6169b35249446b07f40ac5fcc3205f2c048616efacchkuang
6179b35249446b07f40ac5fcc3205f2c048616efacchkuang      if (mask & flat & flat2 & 0x00FF0000) {
6189b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
6199b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p6_l],  +2(%[sp6])    \n\t"
6209b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p5_l],  +2(%[sp5])    \n\t"
6219b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p4_l],  +2(%[sp4])    \n\t"
6229b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p3_l],  +2(%[sp3])    \n\t"
6239b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_l],  +2(%[sp2])    \n\t"
6249b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_l],  +2(%[sp1])    \n\t"
6259b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_l],  +2(%[sp0])    \n\t"
6269b35249446b07f40ac5fcc3205f2c048616efacchkuang
6279b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
6289b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p6_l] "r" (p6_l), [p5_l] "r" (p5_l), [p4_l] "r" (p4_l),
6299b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p3_l] "r" (p3_l), [p2_l] "r" (p2_l), [p1_l] "r" (p1_l),
6309b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p0_l] "r" (p0_l), [sp6] "r" (sp6), [sp5] "r" (sp5),
6319b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp4] "r" (sp4), [sp3] "r" (sp3),
6329b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0)
6339b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
6349b35249446b07f40ac5fcc3205f2c048616efacchkuang
6359b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
6369b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_l],  +2(%[sq0])    \n\t"
6379b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_l],  +2(%[sq1])    \n\t"
6389b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_l],  +2(%[sq2])    \n\t"
6399b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q3_l],  +2(%[sq3])    \n\t"
6409b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q4_l],  +2(%[sq4])    \n\t"
6419b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q5_l],  +2(%[sq5])    \n\t"
6429b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q6_l],  +2(%[sq6])    \n\t"
6439b35249446b07f40ac5fcc3205f2c048616efacchkuang
6449b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
6459b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), [q2_l] "r" (q2_l),
6469b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q3_l] "r" (q3_l), [q4_l] "r" (q4_l), [q5_l] "r" (q5_l),
6479b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q6_l] "r" (q6_l), [sq0] "r" (sq0), [sq1] "r" (sq1),
6489b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq2] "r" (sq2), [sq3] "r" (sq3),
6499b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6)
6509b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
6519b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & flat & 0x00FF0000) {
6529b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
6539b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p2_l_f1],  +2(%[sp2])    \n\t"
6549b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_l_f1],  +2(%[sp1])    \n\t"
6559b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_l_f1],  +2(%[sp0])    \n\t"
6569b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_l_f1],  +2(%[sq0])    \n\t"
6579b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_l_f1],  +2(%[sq1])    \n\t"
6589b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q2_l_f1],  +2(%[sq2])    \n\t"
6599b35249446b07f40ac5fcc3205f2c048616efacchkuang
6609b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
6619b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p2_l_f1] "r" (p2_l_f1), [p1_l_f1] "r" (p1_l_f1),
6629b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p0_l_f1] "r" (p0_l_f1), [q0_l_f1] "r" (q0_l_f1),
6639b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q1_l_f1] "r" (q1_l_f1), [q2_l_f1] "r" (q2_l_f1),
6649b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
6659b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
6669b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
6679b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & 0x00FF0000) {
6689b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
6699b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p1_f0],  +2(%[sp1])    \n\t"
6709b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[p0_f0],  +2(%[sp0])    \n\t"
6719b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q0_f0],  +2(%[sq0])    \n\t"
6729b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb         %[q1_f0],  +2(%[sq1])    \n\t"
6739b35249446b07f40ac5fcc3205f2c048616efacchkuang
6749b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
6759b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), [q0_f0] "r" (q0_f0),
6769b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q1_f0] "r" (q1_f0), [sp1] "r" (sp1), [sp0] "r" (sp0),
6779b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1)
6789b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
6799b35249446b07f40ac5fcc3205f2c048616efacchkuang      }
6809b35249446b07f40ac5fcc3205f2c048616efacchkuang
6819b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
6829b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p6_l],    %[p6_l],    16   \n\t"
6839b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p5_l],    %[p5_l],    16   \n\t"
6849b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p4_l],    %[p4_l],    16   \n\t"
6859b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p3_l],    %[p3_l],    16   \n\t"
6869b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p2_l],    %[p2_l],    16   \n\t"
6879b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p1_l],    %[p1_l],    16   \n\t"
6889b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p0_l],    %[p0_l],    16   \n\t"
6899b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q0_l],    %[q0_l],    16   \n\t"
6909b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q1_l],    %[q1_l],    16   \n\t"
6919b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q2_l],    %[q2_l],    16   \n\t"
6929b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q3_l],    %[q3_l],    16   \n\t"
6939b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q4_l],    %[q4_l],    16   \n\t"
6949b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q5_l],    %[q5_l],    16   \n\t"
6959b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q6_l],    %[q6_l],    16   \n\t"
6969b35249446b07f40ac5fcc3205f2c048616efacchkuang
6979b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [q0_l] "+r" (q0_l), [q1_l] "+r" (q1_l), [q2_l] "+r" (q2_l),
6989b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q3_l] "+r" (q3_l), [q4_l] "+r" (q4_l), [q5_l] "+r" (q5_l),
6999b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q6_l] "+r" (q6_l), [p6_l] "+r" (p6_l), [p5_l] "+r" (p5_l),
7009b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p4_l] "+r" (p4_l), [p3_l] "+r" (p3_l), [p2_l] "+r" (p2_l),
7019b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p1_l] "+r" (p1_l), [p0_l] "+r" (p0_l)
7029b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
7039b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
7049b35249446b07f40ac5fcc3205f2c048616efacchkuang
7059b35249446b07f40ac5fcc3205f2c048616efacchkuang      __asm__ __volatile__ (
7069b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p2_l_f1],   %[p2_l_f1],   16   \n\t"
7079b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p1_l_f1],   %[p1_l_f1],   16   \n\t"
7089b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p0_l_f1],   %[p0_l_f1],   16   \n\t"
7099b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q0_l_f1],   %[q0_l_f1],   16   \n\t"
7109b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q1_l_f1],   %[q1_l_f1],   16   \n\t"
7119b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q2_l_f1],   %[q2_l_f1],   16   \n\t"
7129b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p1_f0],     %[p1_f0],     8    \n\t"
7139b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[p0_f0],     %[p0_f0],     8    \n\t"
7149b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q0_f0],     %[q0_f0],     8    \n\t"
7159b35249446b07f40ac5fcc3205f2c048616efacchkuang          "srl      %[q1_f0],     %[q1_f0],     8    \n\t"
7169b35249446b07f40ac5fcc3205f2c048616efacchkuang
7179b35249446b07f40ac5fcc3205f2c048616efacchkuang          : [p2_l_f1] "+r" (p2_l_f1), [p1_l_f1] "+r" (p1_l_f1),
7189b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p0_l_f1] "+r" (p0_l_f1), [q0_l_f1] "+r" (q0_l_f1),
7199b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q1_l_f1] "+r" (q1_l_f1), [q2_l_f1] "+r" (q2_l_f1),
7209b35249446b07f40ac5fcc3205f2c048616efacchkuang            [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0),
7219b35249446b07f40ac5fcc3205f2c048616efacchkuang            [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0)
7229b35249446b07f40ac5fcc3205f2c048616efacchkuang          :
7239b35249446b07f40ac5fcc3205f2c048616efacchkuang      );
7249b35249446b07f40ac5fcc3205f2c048616efacchkuang
7259b35249446b07f40ac5fcc3205f2c048616efacchkuang      if (mask & flat & flat2 & 0xFF000000) {
7269b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
7279b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p6_l],    +3(%[sp6])    \n\t"
7289b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p5_l],    +3(%[sp5])    \n\t"
7299b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p4_l],    +3(%[sp4])    \n\t"
7309b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p3_l],    +3(%[sp3])    \n\t"
7319b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p2_l],    +3(%[sp2])    \n\t"
7329b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p1_l],    +3(%[sp1])    \n\t"
7339b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p0_l],    +3(%[sp0])    \n\t"
7349b35249446b07f40ac5fcc3205f2c048616efacchkuang
7359b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
7369b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p6_l] "r" (p6_l), [p5_l] "r" (p5_l), [p4_l] "r" (p4_l),
7379b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p3_l] "r" (p3_l), [p2_l] "r" (p2_l), [p1_l] "r" (p1_l),
7389b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p0_l] "r" (p0_l), [sp6] "r" (sp6), [sp5] "r" (sp5),
7399b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp4] "r" (sp4), [sp3] "r" (sp3), [sp2] "r" (sp2),
7409b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp1] "r" (sp1), [sp0] "r" (sp0)
7419b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
7429b35249446b07f40ac5fcc3205f2c048616efacchkuang
7439b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
7449b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q0_l],    +3(%[sq0])    \n\t"
7459b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q1_l],    +3(%[sq1])    \n\t"
7469b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q2_l],    +3(%[sq2])    \n\t"
7479b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q3_l],    +3(%[sq3])    \n\t"
7489b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q4_l],    +3(%[sq4])    \n\t"
7499b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q5_l],    +3(%[sq5])    \n\t"
7509b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q6_l],    +3(%[sq6])    \n\t"
7519b35249446b07f40ac5fcc3205f2c048616efacchkuang
7529b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
7539b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [q0_l] "r" (q0_l), [q1_l] "r" (q1_l),
7549b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q2_l] "r" (q2_l), [q3_l] "r" (q3_l),
7559b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q4_l] "r" (q4_l), [q5_l] "r" (q5_l),
7569b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2),
7579b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq3] "r" (sq3), [sq4] "r" (sq4), [sq5] "r" (sq5),
7589b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q6_l] "r" (q6_l), [sq6] "r" (sq6)
7599b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
7609b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & flat & 0xFF000000) {
7619b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
7629b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p2_l_f1],     +3(%[sp2])    \n\t"
7639b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p1_l_f1],     +3(%[sp1])    \n\t"
7649b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p0_l_f1],     +3(%[sp0])    \n\t"
7659b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q0_l_f1],     +3(%[sq0])    \n\t"
7669b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q1_l_f1],     +3(%[sq1])    \n\t"
7679b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q2_l_f1],     +3(%[sq2])    \n\t"
7689b35249446b07f40ac5fcc3205f2c048616efacchkuang
7699b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
7709b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p2_l_f1] "r" (p2_l_f1), [p1_l_f1] "r" (p1_l_f1),
7719b35249446b07f40ac5fcc3205f2c048616efacchkuang              [p0_l_f1] "r" (p0_l_f1), [q0_l_f1] "r" (q0_l_f1),
7729b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q1_l_f1] "r" (q1_l_f1), [q2_l_f1] "r" (q2_l_f1),
7739b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0),
7749b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2)
7759b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
7769b35249446b07f40ac5fcc3205f2c048616efacchkuang      } else if (mask & 0xFF000000) {
7779b35249446b07f40ac5fcc3205f2c048616efacchkuang        __asm__ __volatile__ (
7789b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p1_f0],   +3(%[sp1])    \n\t"
7799b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[p0_f0],   +3(%[sp0])    \n\t"
7809b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q0_f0],   +3(%[sq0])    \n\t"
7819b35249446b07f40ac5fcc3205f2c048616efacchkuang            "sb     %[q1_f0],   +3(%[sq1])    \n\t"
7829b35249446b07f40ac5fcc3205f2c048616efacchkuang
7839b35249446b07f40ac5fcc3205f2c048616efacchkuang            :
7849b35249446b07f40ac5fcc3205f2c048616efacchkuang            : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0),
7859b35249446b07f40ac5fcc3205f2c048616efacchkuang              [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0),
7869b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sp1] "r" (sp1), [sp0] "r" (sp0),
7879b35249446b07f40ac5fcc3205f2c048616efacchkuang              [sq0] "r" (sq0), [sq1] "r" (sq1)
7889b35249446b07f40ac5fcc3205f2c048616efacchkuang        );
7899b35249446b07f40ac5fcc3205f2c048616efacchkuang      }
7909b35249446b07f40ac5fcc3205f2c048616efacchkuang    }
7919b35249446b07f40ac5fcc3205f2c048616efacchkuang
7929b35249446b07f40ac5fcc3205f2c048616efacchkuang    s = s + 4;
7939b35249446b07f40ac5fcc3205f2c048616efacchkuang  }
7949b35249446b07f40ac5fcc3205f2c048616efacchkuang}
7959b35249446b07f40ac5fcc3205f2c048616efacchkuang#endif  // #if HAVE_DSPR2
796