15ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang/*
25ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
35ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *
45ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  Use of this source code is governed by a BSD-style license
55ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  that can be found in the LICENSE file in the root of the source
65ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  tree. An additional intellectual property rights grant can be found
75ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  in the file PATENTS.  All contributing project authors may
85ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang *  be found in the AUTHORS file in the root of the source tree.
95ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */
105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <assert.h>
125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <stdio.h>
135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vpx_config.h"
155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vp9_rtcd.h"
165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_common.h"
175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx/vpx_integer.h"
185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx_ports/mem.h"
195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_convolve.h"
205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#if HAVE_DSPR2
235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_horiz_4_dspr2(const uint8_t *src,
245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       int32_t src_stride,
255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       uint8_t *dst,
265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       int32_t dst_stride,
275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       const int16_t *filter_x0,
285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       int32_t h) {
295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t y;
305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *cm = vp9_ff_cropTbl;
315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t  vector1b, vector2b, vector3b, vector4b;
325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t  Temp1, Temp2, Temp3, Temp4;
335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t vector4a = 64;
345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t tp1, tp2;
355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t p1, p2, p3, p4;
365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t n1, n2, n3, n4;
375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t tn1, tn2;
385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector1b = ((const int32_t *)filter_x0)[0];
405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector2b = ((const int32_t *)filter_x0)[1];
415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector3b = ((const int32_t *)filter_x0)[2];
425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector4b = ((const int32_t *)filter_x0)[3];
435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  for (y = h; y--;) {
455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* prefetch data to cache memory */
465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_load(src + src_stride);
475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_load(src + src_stride + 32);
485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_store(dst + dst_stride);
495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    __asm__ __volatile__ (
515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "ulw              %[tp1],         0(%[src])                      \n\t"
525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "ulw              %[tp2],         4(%[src])                      \n\t"
535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* even 1. pixel */
555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac3                           \n\t"
565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac3                           \n\t"
575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p1],          %[tp1]                         \n\t"
585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[p2],          %[tp1]                         \n\t"
595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p3],          %[tp2]                         \n\t"
605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[p4],          %[tp2]                         \n\t"
615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p1],          %[vector1b]    \n\t"
625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p2],          %[vector2b]    \n\t"
635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p3],          %[vector3b]    \n\t"
645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "ulw              %[tn2],         8(%[src])                      \n\t"
655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p4],          %[vector4b]    \n\t"
665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp1],       $ac3,           31             \n\t"
675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* even 2. pixel */
695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac2                           \n\t"
705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac2                           \n\t"
715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p1],          %[tn2]                         \n\t"
725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "balign           %[tn1],         %[tn2],         3              \n\t"
735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "balign           %[tn2],         %[tp2],         3              \n\t"
745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "balign           %[tp2],         %[tp1],         3              \n\t"
755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p2],          %[vector1b]    \n\t"
765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p3],          %[vector2b]    \n\t"
775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p4],          %[vector3b]    \n\t"
785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p1],          %[vector4b]    \n\t"
795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp3],       $ac2,           31             \n\t"
805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[p2],          3(%[dst])                      \n\t"  /* load odd 2 */
825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* odd 1. pixel */
845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[tp1],         %[Temp1](%[cm])                \n\t"  /* even 1 */
855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac3                           \n\t"
865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac3                           \n\t"
875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[Temp1],       1(%[dst])                      \n\t"  /* load odd 1 */
885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[n1],          %[tp2]                         \n\t"
895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[n2],          %[tp2]                         \n\t"
905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[n3],          %[tn2]                         \n\t"
915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[n4],          %[tn2]                         \n\t"
925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[n1],          %[vector1b]    \n\t"
935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[n2],          %[vector2b]    \n\t"
945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[n3],          %[vector3b]    \n\t"
955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[n4],          %[vector4b]    \n\t"
965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp2],       $ac3,           31             \n\t"
975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[tn2],         0(%[dst])                      \n\t"  /* load even 1 */
995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* odd 2. pixel */
1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[tp2],         %[Temp3](%[cm])                \n\t"  /* even 2 */
1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac2                           \n\t"
1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac2                           \n\t"
1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[n1],          %[tn1]                         \n\t"
1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[tn1],         %[Temp2](%[cm])                \n\t"  /* odd 1 */
1065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[tn2],         %[tn2],         %[tp1]         \n\t"  /* average even 1 */
1075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[n2],          %[vector1b]    \n\t"
1085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[n3],          %[vector2b]    \n\t"
1095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[n4],          %[vector3b]    \n\t"
1105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[n1],          %[vector4b]    \n\t"
1115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp4],       $ac2,           31             \n\t"
1125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[tp1],         2(%[dst])                      \n\t"  /* load even 2 */
1145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[tn2],         0(%[dst])                      \n\t"  /* store even 1 */
1155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* clamp */
1175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[Temp1],       %[Temp1],       %[tn1]         \n\t"  /* average odd 1 */
1185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[n2],          %[Temp4](%[cm])                \n\t"  /* odd 2 */
1195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[Temp1],       1(%[dst])                      \n\t"  /* store odd 1 */
1205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[tp1],         %[tp1],         %[tp2]         \n\t"  /* average even 2 */
1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[tp1],         2(%[dst])                      \n\t"  /* store even 2 */
1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[p2],          %[p2],          %[n2]          \n\t"  /* average odd 2 */
1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[p2],          3(%[dst])                      \n\t"  /* store odd 2 */
1265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2),
1285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [tn1] "=&r" (tn1), [tn2] "=&r" (tn2),
1295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4),
1305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), [n4] "=&r" (n4),
1315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
1325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
1345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [vector3b] "r" (vector3b), [vector4b] "r" (vector4b),
1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [vector4a] "r" (vector4a),
1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src)
1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    );
1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* Next row... */
1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    src += src_stride;
1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    dst += dst_stride;
1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  }
1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang}
1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_horiz_8_dspr2(const uint8_t *src,
1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       int32_t src_stride,
1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       uint8_t *dst,
1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       int32_t dst_stride,
1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       const int16_t *filter_x0,
1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                       int32_t h) {
1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t y;
1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *cm = vp9_ff_cropTbl;
1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t vector4a = 64;
1545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t vector1b, vector2b, vector3b, vector4b;
1555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t Temp1, Temp2, Temp3;
1565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t tp1, tp2;
1575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t p1, p2, p3, p4, n1;
1585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t tn1, tn2, tn3;
1595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t st0, st1;
1605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector1b = ((const int32_t *)filter_x0)[0];
1625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector2b = ((const int32_t *)filter_x0)[1];
1635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector3b = ((const int32_t *)filter_x0)[2];
1645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  vector4b = ((const int32_t *)filter_x0)[3];
1655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  for (y = h; y--;) {
1675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* prefetch data to cache memory */
1685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_load(src + src_stride);
1695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_load(src + src_stride + 32);
1705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_store(dst + dst_stride);
1715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    __asm__ __volatile__ (
1735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "ulw              %[tp1],         0(%[src])                      \n\t"
1745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "ulw              %[tp2],         4(%[src])                      \n\t"
1755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* even 1. pixel */
1775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac3                           \n\t"
1785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac3                           \n\t"
1795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac2                           \n\t"
1805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac2                           \n\t"
1815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p1],          %[tp1]                         \n\t"
1825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[p2],          %[tp1]                         \n\t"
1835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p3],          %[tp2]                         \n\t"
1845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[p4],          %[tp2]                         \n\t"
1855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "ulw              %[tn2],         8(%[src])                      \n\t"
1865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p1],          %[vector1b]    \n\t"
1875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p2],          %[vector2b]    \n\t"
1885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p3],          %[vector3b]    \n\t"
1895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p4],          %[vector4b]    \n\t"
1905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp1],       $ac3,           31             \n\t"
1915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[Temp2],       0(%[dst])                      \n\t"
1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[tn3],         2(%[dst])                      \n\t"
1935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* even 2. pixel */
1955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p1],          %[tn2]                         \n\t"
1965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[n1],          %[tn2]                         \n\t"
1975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "ulw              %[tn1],         12(%[src])                     \n\t"
1985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p2],          %[vector1b]    \n\t"
1995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p3],          %[vector2b]    \n\t"
2005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p4],          %[vector3b]    \n\t"
2015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p1],          %[vector4b]    \n\t"
2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp3],       $ac2,           31             \n\t"
2035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* even 3. pixel */
2055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[st0],         %[Temp1](%[cm])                \n\t"
2065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac1                           \n\t"
2075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac1                           \n\t"
2085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p2],          %[tn1]                         \n\t"
2095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[st1],         %[Temp3](%[cm])                \n\t"
2105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac1,           %[p3],          %[vector1b]    \n\t"
2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac1,           %[p4],          %[vector2b]    \n\t"
2125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac1,           %[p1],          %[vector3b]    \n\t"
2135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac1,           %[n1],          %[vector4b]    \n\t"
2145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp1],       $ac1,           31             \n\t"
2155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[Temp2],       %[Temp2],       %[st0]         \n\t"
2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[tn3],         %[tn3],         %[st1]         \n\t"
2185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[Temp2],       0(%[dst])                      \n\t"
2195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[tn3],         2(%[dst])                      \n\t"
2205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* even 4. pixel */
2225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac2                           \n\t"
2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac2                           \n\t"
2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac3                           \n\t"
2255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac3                           \n\t"
2265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "balign           %[tn3],         %[tn1],         3              \n\t"
2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "balign           %[tn1],         %[tn2],         3              \n\t"
2295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "balign           %[tn2],         %[tp2],         3              \n\t"
2305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "balign           %[tp2],         %[tp1],         3              \n\t"
2315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[st0],         %[Temp1](%[cm])                \n\t"
2335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[Temp2],       4(%[dst])                      \n\t"
2345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[Temp2],       %[Temp2],       %[st0]         \n\t"
2355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p4],          %[vector1b]    \n\t"
2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p1],          %[vector2b]    \n\t"
2385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[n1],          %[vector3b]    \n\t"
2395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p2],          %[vector4b]    \n\t"
2405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp3],       $ac2,           31             \n\t"
2415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* odd 1. pixel */
2435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac1                           \n\t"
2445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac1                           \n\t"
2455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[Temp2],       4(%[dst])                      \n\t"
2465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p1],          %[tp2]                         \n\t"
2475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[p2],          %[tp2]                         \n\t"
2485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p3],          %[tn2]                         \n\t"
2495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[p4],          %[tn2]                         \n\t"
2505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p1],          %[vector1b]    \n\t"
2515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p2],          %[vector2b]    \n\t"
2525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p3],          %[vector3b]    \n\t"
2535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p4],          %[vector4b]    \n\t"
2545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp2],       $ac3,           31             \n\t"
2555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[tp1],         6(%[dst])                      \n\t"
2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* odd 2. pixel */
2595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac3                           \n\t"
2605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac3                           \n\t"
2615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mtlo             %[vector4a],    $ac2                           \n\t"
2625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "mthi             $zero,          $ac2                           \n\t"
2635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p1],          %[tn1]                         \n\t"
2645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbl    %[n1],          %[tn1]                         \n\t"
2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[st0],         %[Temp3](%[cm])                \n\t"
2665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac1,           %[p2],          %[vector1b]    \n\t"
2675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac1,           %[p3],          %[vector2b]    \n\t"
2685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac1,           %[p4],          %[vector3b]    \n\t"
2695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac1,           %[p1],          %[vector4b]    \n\t"
2705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp3],       $ac1,           31             \n\t"
2715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[tp2],         1(%[dst])                      \n\t"
2735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[tn2],         3(%[dst])                      \n\t"
2745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[tp1],         %[tp1],         %[st0]         \n\t"
2755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* odd 3. pixel */
2775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[st1],         %[Temp2](%[cm])                \n\t"
2785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "preceu.ph.qbr    %[p2],          %[tn3]                         \n\t"
2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p3],          %[vector1b]    \n\t"
2805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p4],          %[vector2b]    \n\t"
2815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[p1],          %[vector3b]    \n\t"
2825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac3,           %[n1],          %[vector4b]    \n\t"
2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[tp2],         %[tp2],         %[st1]         \n\t"
2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp2],       $ac3,           31             \n\t"
2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[tn3],         5(%[dst])                      \n\t"
2875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* odd 4. pixel */
2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[tp2],         1(%[dst])                      \n\t"
2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[tp1],         6(%[dst])                      \n\t"
2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p4],          %[vector1b]    \n\t"
2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p1],          %[vector2b]    \n\t"
2935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[n1],          %[vector3b]    \n\t"
2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "dpa.w.ph         $ac2,           %[p2],          %[vector4b]    \n\t"
2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "extp             %[Temp1],       $ac2,           31             \n\t"
2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbu              %[tn1],         7(%[dst])                      \n\t"
2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* clamp */
3005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[p4],          %[Temp3](%[cm])                \n\t"
3015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[tn2],         %[tn2],         %[p4]          \n\t"
3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[p2],          %[Temp2](%[cm])                \n\t"
3045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[tn3],         %[tn3],         %[p2]          \n\t"
3055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "lbux             %[n1],          %[Temp1](%[cm])                \n\t"
3075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "addqh_r.w        %[tn1],         %[tn1],         %[n1]          \n\t"
3085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        /* store bytes */
3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[tn2],         3(%[dst])                      \n\t"
3115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[tn3],         5(%[dst])                      \n\t"
3125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "sb               %[tn1],         7(%[dst])                      \n\t"
3135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2),
3155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [tn1] "=&r" (tn1), [tn2] "=&r" (tn2), [tn3] "=&r" (tn3),
3165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [st0] "=&r" (st0), [st1] "=&r" (st1),
3175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4),
3185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [n1] "=&r" (n1),
3195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3)
3205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
3215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [vector3b] "r" (vector3b), [vector4b] "r" (vector4b),
3225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [vector4a] "r" (vector4a),
3235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src)
3245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    );
3255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* Next row... */
3275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    src += src_stride;
3285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    dst += dst_stride;
3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  }
3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang}
3315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_horiz_16_dspr2(const uint8_t *src_ptr,
3335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        int32_t src_stride,
3345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        uint8_t *dst_ptr,
3355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        int32_t dst_stride,
3365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        const int16_t *filter_x0,
3375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        int32_t h,
3385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        int32_t count) {
3395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t y, c;
3405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const uint8_t *src;
3415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *dst;
3425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *cm = vp9_ff_cropTbl;
3435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t vector_64 = 64;
3445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t filter12, filter34, filter56, filter78;
3455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t Temp1, Temp2, Temp3;
3465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t qload1, qload2, qload3;
3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t p1, p2, p3, p4, p5;
3485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t st1, st2, st3;
3495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  filter12 = ((const int32_t *)filter_x0)[0];
3515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  filter34 = ((const int32_t *)filter_x0)[1];
3525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  filter56 = ((const int32_t *)filter_x0)[2];
3535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  filter78 = ((const int32_t *)filter_x0)[3];
3545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  for (y = h; y--;) {
3565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    src = src_ptr;
3575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    dst = dst_ptr;
3585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* prefetch data to cache memory */
3605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_load(src_ptr + src_stride);
3615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_load(src_ptr + src_stride + 32);
3625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_store(dst_ptr + dst_stride);
3635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    for (c = 0; c < count; c++) {
3655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      __asm__ __volatile__ (
3665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload1],    0(%[src])                    \n\t"
3675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload2],    4(%[src])                    \n\t"
3685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 1. pixel */
3705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
3715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
3735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
3785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload3],    8(%[src])                    \n\t"
3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter12]  \n\t" /* even 1 */
3805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter34]  \n\t" /* even 1 */
3815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter56]  \n\t" /* even 1 */
3825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter78]  \n\t" /* even 1 */
3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[st2],       0(%[dst])                    \n\t" /* load even 1 from dst */
3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 2. pixel */
3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
3895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
3905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
3915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload1],    12(%[src])                   \n\t"
3925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[filter12]  \n\t" /* even 1 */
3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter34]  \n\t" /* even 1 */
3945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p4],          %[filter56]  \n\t" /* even 1 */
3955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[filter78]  \n\t" /* even 1 */
3965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
3975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
3985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
3995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    2(%[dst])                    \n\t" /* load even 2 from dst */
4005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 3. pixel */
4025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
4035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
4045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 1 */
4055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
4065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[st2],       0(%[dst])                    \n\t" /* store even 1 to dst */
4075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p3],          %[filter12]  \n\t" /* even 3 */
4085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter34]  \n\t" /* even 3 */
4095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p1],          %[filter56]  \n\t" /* even 3 */
4105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p5],          %[filter78]  \n\t" /* even 3 */
4115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
4125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
4135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 4. pixel */
4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
4165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
4175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st2]       \n\t" /* average even 2 */
4185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
4195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    2(%[dst])                    \n\t" /* store even 2 to dst */
4205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload2],    16(%[src])                   \n\t"
4215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    4(%[dst])                    \n\t" /* load even 3 from dst */
4225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload1],    6(%[dst])                    \n\t" /* load even 4 from dst */
4235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter12]  \n\t" /* even 4 */
4245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter34]  \n\t" /* even 4 */
4255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p5],          %[filter56]  \n\t" /* even 4 */
4265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter78]  \n\t" /* even 4 */
4275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
4285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
4295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 5. pixel */
4315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
4325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
4335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 3 */
4345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p4],        %[qload2]                    \n\t"
4355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    4(%[dst])                    \n\t" /* store even 3 to dst */
4365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[filter12]  \n\t" /* even 5 */
4375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p5],          %[filter34]  \n\t" /* even 5 */
4385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[filter56]  \n\t" /* even 5 */
4395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter78]  \n\t" /* even 5 */
4405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
4415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
4425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 6. pixel */
4445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
4455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
4465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average even 4 */
4475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p1],        %[qload2]                    \n\t"
4485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload1],    6(%[dst])                    \n\t" /* store even 4 to dst */
4495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload3],    20(%[src])                   \n\t"
4505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p5],          %[filter12]  \n\t" /* even 6 */
4515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p2],          %[filter34]  \n\t" /* even 6 */
4525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p3],          %[filter56]  \n\t" /* even 6 */
4535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter78]  \n\t" /* even 6 */
4545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload2],    8(%[dst])                    \n\t" /* load even 5 from dst */
4555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
4565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
4575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 7. pixel */
4595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
4605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
4615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 5 */
4625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p5],        %[qload3]                    \n\t"
4635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload2],    8(%[dst])                    \n\t" /* store even 5 to dst */
4645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter12]  \n\t" /* even 7 */
4655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter34]  \n\t" /* even 7 */
4665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter56]  \n\t" /* even 7 */
4675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter78]  \n\t" /* even 7 */
4685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    10(%[dst])                   \n\t" /* load even 6 from dst */
4695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
4705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
4715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[st2],       12(%[dst])                   \n\t" /* load even 7 from dst */
4735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 8. pixel */
4755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
4765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
4775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 6 */
4785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter12]  \n\t" /* even 8 */
4795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p4],          %[filter34]  \n\t" /* even 8 */
4805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    10(%[dst])                   \n\t" /* store even 6 to dst */
4815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[filter56]  \n\t" /* even 8 */
4825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p5],          %[filter78]  \n\t" /* even 8 */
4835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
4845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
4855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* ODD pixels */
4875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload1],    1(%[src])                   \n\t"
4885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload2],    5(%[src])                    \n\t"
4895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 7 */
4915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
4925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 1. pixel */
4935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
4945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
4955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
4965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
4975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
4985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
4995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[st2],       12(%[dst])                   \n\t" /* store even 7 to dst */
5005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload3],    9(%[src])                    \n\t"
5015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p1],          %[filter12]  \n\t" /* odd 1 */
5025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p2],          %[filter34]  \n\t" /* odd 1 */
5035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload2],    14(%[dst])                   \n\t" /* load even 8 from dst */
5045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p3],          %[filter56]  \n\t" /* odd 1 */
5055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter78]  \n\t" /* odd 1 */
5065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
5075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
5085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[st1],       1(%[dst])                    \n\t" /* load odd 1 from dst */
5105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 2. pixel */
5125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
5135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
5145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 8 */
5155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
5165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
5175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload2],    14(%[dst])                   \n\t" /* store even 8 to dst */
5185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload1],    13(%[src])                   \n\t"
5195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter12]  \n\t" /* odd 2 */
5205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter34]  \n\t" /* odd 2 */
5215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter56]  \n\t" /* odd 2 */
5225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter78]  \n\t" /* odd 2 */
5235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    3(%[dst])                    \n\t" /* load odd 2 from dst */
5245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
5255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
5265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 3. pixel */
5285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
5295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
5305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[st3],       %[st3],         %[st1]       \n\t" /* average odd 1 */
5315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
5325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter12]  \n\t" /* odd 3 */
5335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p4],          %[filter34]  \n\t" /* odd 3 */
5345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[filter56]  \n\t" /* odd 3 */
5355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p5],          %[filter78]  \n\t" /* odd 3 */
5365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[st3],       1(%[dst])                    \n\t" /* store odd 1 to dst */
5375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
5385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
5395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 4. pixel */
5415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
5425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
5435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st1]       \n\t" /* average odd 2 */
5445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
5455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    3(%[dst])                    \n\t" /* store odd 2 to dst */
5465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload1],    5(%[dst])                    \n\t" /* load odd 3 from dst */
5475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload2],    17(%[src])                   \n\t"
5485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter12]  \n\t" /* odd 4 */
5495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p1],          %[filter34]  \n\t" /* odd 4 */
5505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p5],          %[filter56]  \n\t" /* odd 4 */
5515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p2],          %[filter78]  \n\t" /* odd 4 */
5525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
5535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
5545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[st1],       7(%[dst])                    \n\t" /* load odd 4 from dst */
5565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 5. pixel */
5585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
5595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
5605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload1],    %[qload1],      %[st2]       \n\t" /* average odd 3 */
5615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p4],        %[qload2]                    \n\t"
5625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload1],    5(%[dst])                    \n\t" /* store odd 3 to dst */
5635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter12]  \n\t" /* odd 5 */
5645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p5],          %[filter34]  \n\t" /* odd 5 */
5655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter56]  \n\t" /* odd 5 */
5665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter78]  \n\t" /* odd 5 */
5675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
5685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
5695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload1],    9(%[dst])                    \n\t" /* load odd 5 from dst */
5715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 6. pixel */
5735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
5745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
5755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[st1],       %[st1],         %[st3]       \n\t" /* average odd 4 */
5765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p1],        %[qload2]                    \n\t"
5775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[st1],       7(%[dst])                    \n\t" /* store odd 4 to dst */
5785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload3],    21(%[src])                   \n\t"
5795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p5],          %[filter12]  \n\t" /* odd 6 */
5805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[filter34]  \n\t" /* odd 6 */
5815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter56]  \n\t" /* odd 6 */
5825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p4],          %[filter78]  \n\t" /* odd 6 */
5835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
5845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
5855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 7. pixel */
5875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
5885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
5895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 5 */
5905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p5],        %[qload3]                    \n\t"
5915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload1],    9(%[dst])                    \n\t" /* store odd 5 to dst */
5925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload2],    11(%[dst])                   \n\t" /* load odd 6 from dst */
5935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p2],          %[filter12]  \n\t" /* odd 7 */
5945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p3],          %[filter34]  \n\t" /* odd 7 */
5955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter56]  \n\t" /* odd 7 */
5965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p1],          %[filter78]  \n\t" /* odd 7 */
5975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
5985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
5995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    13(%[dst])                   \n\t" /* load odd 7 from dst */
6005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 8. pixel */
6025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter12]  \n\t" /* odd 8 */
6035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter34]  \n\t" /* odd 8 */
6045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter56]  \n\t" /* odd 8 */
6055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p5],          %[filter78]  \n\t" /* odd 8 */
6065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
6075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload1],    15(%[dst])                   \n\t" /* load odd 8 from dst */
6095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
6115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average odd 6 */
6125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
6145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average odd 7 */
6155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
6175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 8 */
6185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload2],    11(%[dst])                   \n\t" /* store odd 6 to dst */
6205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    13(%[dst])                   \n\t" /* store odd 7 to dst */
6215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload1],    15(%[dst])                   \n\t" /* store odd 8 to dst */
6225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          : [qload1] "=&r" (qload1), [qload2] "=&r" (qload2),
6245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [st1] "=&r" (st1), [st2] "=&r" (st2), [st3] "=&r" (st3),
6255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4),
6265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [qload3] "=&r" (qload3), [p5] "=&r" (p5),
6275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3)
6285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          : [filter12] "r" (filter12), [filter34] "r" (filter34),
6295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [filter56] "r" (filter56), [filter78] "r" (filter78),
6305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [vector_64] "r" (vector_64),
6315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src)
6325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      );
6335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      src += 16;
6355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      dst += 16;
6365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    }
6375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* Next row... */
6395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    src_ptr += src_stride;
6405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    dst_ptr += dst_stride;
6415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  }
6425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang}
6435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_horiz_64_dspr2(const uint8_t *src_ptr,
6455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        int32_t src_stride,
6465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        uint8_t *dst_ptr,
6475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        int32_t dst_stride,
6485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        const int16_t *filter_x0,
6495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                        int32_t h) {
6505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t y, c;
6515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const uint8_t *src;
6525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *dst;
6535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint8_t *cm = vp9_ff_cropTbl;
6545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t vector_64 = 64;
6555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t filter12, filter34, filter56, filter78;
6565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int32_t Temp1, Temp2, Temp3;
6575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t qload1, qload2, qload3;
6585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t p1, p2, p3, p4, p5;
6595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  uint32_t st1, st2, st3;
6605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  filter12 = ((const int32_t *)filter_x0)[0];
6625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  filter34 = ((const int32_t *)filter_x0)[1];
6635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  filter56 = ((const int32_t *)filter_x0)[2];
6645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  filter78 = ((const int32_t *)filter_x0)[3];
6655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  for (y = h; y--;) {
6675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    src = src_ptr;
6685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    dst = dst_ptr;
6695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* prefetch data to cache memory */
6715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_load(src_ptr + src_stride);
6725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_load(src_ptr + src_stride + 32);
6735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_load(src_ptr + src_stride + 64);
6745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_store(dst_ptr + dst_stride);
6755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_prefetch_store(dst_ptr + dst_stride + 32);
6765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    for (c = 0; c < 4; c++) {
6785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      __asm__ __volatile__ (
6795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload1],    0(%[src])                    \n\t"
6805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload2],    4(%[src])                    \n\t"
6815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 1. pixel */
6835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
6845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
6855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
6865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
6875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
6885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
6895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
6905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
6915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload3],    8(%[src])                    \n\t"
6925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter12]  \n\t" /* even 1 */
6935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter34]  \n\t" /* even 1 */
6945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter56]  \n\t" /* even 1 */
6955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter78]  \n\t" /* even 1 */
6965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
6975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[st2],       0(%[dst])                    \n\t" /* load even 1 from dst */
6985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
6995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 2. pixel */
7005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
7015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
7025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
7035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
7045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload1],    12(%[src])                   \n\t"
7055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[filter12]  \n\t" /* even 1 */
7065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter34]  \n\t" /* even 1 */
7075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p4],          %[filter56]  \n\t" /* even 1 */
7085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[filter78]  \n\t" /* even 1 */
7095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
7105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
7115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
7125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    2(%[dst])                    \n\t" /* load even 2 from dst */
7135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
7145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 3. pixel */
7155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
7165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
7175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 1 */
7185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
7195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[st2],       0(%[dst])                    \n\t" /* store even 1 to dst */
7205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p3],          %[filter12]  \n\t" /* even 3 */
7215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter34]  \n\t" /* even 3 */
7225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p1],          %[filter56]  \n\t" /* even 3 */
7235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p5],          %[filter78]  \n\t" /* even 3 */
7245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
7255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
7265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
7275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 4. pixel */
7285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
7295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
7305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st2]       \n\t" /* average even 2 */
7315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
7325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    2(%[dst])                    \n\t" /* store even 2 to dst */
7335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload2],    16(%[src])                   \n\t"
7345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    4(%[dst])                    \n\t" /* load even 3 from dst */
7355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload1],    6(%[dst])                    \n\t" /* load even 4 from dst */
7365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter12]  \n\t" /* even 4 */
7375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter34]  \n\t" /* even 4 */
7385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p5],          %[filter56]  \n\t" /* even 4 */
7395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter78]  \n\t" /* even 4 */
7405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
7415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
7425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
7435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 5. pixel */
7445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
7455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
7465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 3 */
7475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p4],        %[qload2]                    \n\t"
7485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    4(%[dst])                    \n\t" /* store even 3 to dst */
7495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[filter12]  \n\t" /* even 5 */
7505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p5],          %[filter34]  \n\t" /* even 5 */
7515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[filter56]  \n\t" /* even 5 */
7525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter78]  \n\t" /* even 5 */
7535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
7545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
7555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
7565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 6. pixel */
7575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
7585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
7595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average even 4 */
7605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p1],        %[qload2]                    \n\t"
7615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload1],    6(%[dst])                    \n\t" /* store even 4 to dst */
7625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload3],    20(%[src])                   \n\t"
7635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p5],          %[filter12]  \n\t" /* even 6 */
7645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p2],          %[filter34]  \n\t" /* even 6 */
7655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p3],          %[filter56]  \n\t" /* even 6 */
7665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter78]  \n\t" /* even 6 */
7675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload2],    8(%[dst])                    \n\t" /* load even 5 from dst */
7685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
7695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
7705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
7715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 7. pixel */
7725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
7735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
7745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 5 */
7755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p5],        %[qload3]                    \n\t"
7765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload2],    8(%[dst])                    \n\t" /* store even 5 to dst */
7775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter12]  \n\t" /* even 7 */
7785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter34]  \n\t" /* even 7 */
7795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter56]  \n\t" /* even 7 */
7805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter78]  \n\t" /* even 7 */
7815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    10(%[dst])                   \n\t" /* load even 6 from dst */
7825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
7835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
7845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
7855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[st2],       12(%[dst])                   \n\t" /* load even 7 from dst */
7865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
7875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* even 8. pixel */
7885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
7895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
7905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average even 6 */
7915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter12]  \n\t" /* even 8 */
7925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p4],          %[filter34]  \n\t" /* even 8 */
7935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    10(%[dst])                   \n\t" /* store even 6 to dst */
7945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[filter56]  \n\t" /* even 8 */
7955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p5],          %[filter78]  \n\t" /* even 8 */
7965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
7975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
7985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
7995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* ODD pixels */
8005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload1],    1(%[src])                   \n\t"
8015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload2],    5(%[src])                    \n\t"
8025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[st2],       %[st2],         %[st1]       \n\t" /* average even 7 */
8045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 1. pixel */
8065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
8075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
8085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
8095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
8105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
8115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
8125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[st2],       12(%[dst])                   \n\t" /* store even 7 to dst */
8135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload3],    9(%[src])                    \n\t"
8145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p1],          %[filter12]  \n\t" /* odd 1 */
8155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p2],          %[filter34]  \n\t" /* odd 1 */
8165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload2],    14(%[dst])                   \n\t" /* load even 8 from dst */
8175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p3],          %[filter56]  \n\t" /* odd 1 */
8185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter78]  \n\t" /* odd 1 */
8195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
8205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
8215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[st1],       1(%[dst])                    \n\t" /* load odd 1 from dst */
8235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 2. pixel */
8255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
8265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
8275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average even 8 */
8285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
8295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
8305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload2],    14(%[dst])                   \n\t" /* store even 8 to dst */
8315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload1],    13(%[src])                   \n\t"
8325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter12]  \n\t" /* odd 2 */
8335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter34]  \n\t" /* odd 2 */
8345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter56]  \n\t" /* odd 2 */
8355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter78]  \n\t" /* odd 2 */
8365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    3(%[dst])                    \n\t" /* load odd 2 from dst */
8375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
8385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
8395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 3. pixel */
8415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
8425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
8435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[st3],       %[st3],         %[st1]       \n\t" /* average odd 1 */
8445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
8455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter12]  \n\t" /* odd 3 */
8465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p4],          %[filter34]  \n\t" /* odd 3 */
8475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p1],          %[filter56]  \n\t" /* odd 3 */
8485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p5],          %[filter78]  \n\t" /* odd 3 */
8495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[st3],       1(%[dst])                    \n\t" /* store odd 1 to dst */
8505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
8515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
8525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 4. pixel */
8545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
8555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
8565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st1]       \n\t" /* average odd 2 */
8575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
8585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    3(%[dst])                    \n\t" /* store odd 2 to dst */
8595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload1],    5(%[dst])                    \n\t" /* load odd 3 from dst */
8605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload2],    17(%[src])                   \n\t"
8615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter12]  \n\t" /* odd 4 */
8625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p1],          %[filter34]  \n\t" /* odd 4 */
8635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p5],          %[filter56]  \n\t" /* odd 4 */
8645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p2],          %[filter78]  \n\t" /* odd 4 */
8655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
8665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
8675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[st1],       7(%[dst])                    \n\t" /* load odd 4 from dst */
8695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 5. pixel */
8715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
8725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac2                         \n\t"
8735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload1],    %[qload1],      %[st2]       \n\t" /* average odd 3 */
8745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p4],        %[qload2]                    \n\t"
8755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload1],    5(%[dst])                    \n\t" /* store odd 3 to dst */
8765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter12]  \n\t" /* odd 5 */
8775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p5],          %[filter34]  \n\t" /* odd 5 */
8785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p2],          %[filter56]  \n\t" /* odd 5 */
8795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter78]  \n\t" /* odd 5 */
8805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
8815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
8825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload1],    9(%[dst])                    \n\t" /* load odd 5 from dst */
8845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 6. pixel */
8865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
8875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac3                         \n\t"
8885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[st1],       %[st1],         %[st3]       \n\t" /* average odd 4 */
8895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbl    %[p1],        %[qload2]                    \n\t"
8905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[st1],       7(%[dst])                    \n\t" /* store odd 4 to dst */
8915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "ulw              %[qload3],    21(%[src])                   \n\t"
8925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p5],          %[filter12]  \n\t" /* odd 6 */
8935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p2],          %[filter34]  \n\t" /* odd 6 */
8945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p3],          %[filter56]  \n\t" /* odd 6 */
8955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac2,         %[p4],          %[filter78]  \n\t" /* odd 6 */
8965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
8975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
8985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
8995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 7. pixel */
9005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
9015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "mthi             $zero,        $ac1                         \n\t"
9025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 5 */
9035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "preceu.ph.qbr    %[p5],        %[qload3]                    \n\t"
9045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload1],    9(%[dst])                    \n\t" /* store odd 5 to dst */
9055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload2],    11(%[dst])                   \n\t" /* load odd 6 from dst */
9065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p2],          %[filter12]  \n\t" /* odd 7 */
9075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p3],          %[filter34]  \n\t" /* odd 7 */
9085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p4],          %[filter56]  \n\t" /* odd 7 */
9095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac3,         %[p1],          %[filter78]  \n\t" /* odd 7 */
9105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
9115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload3],    13(%[dst])                   \n\t" /* load odd 7 from dst */
9135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          /* odd 8. pixel */
9155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p3],          %[filter12]  \n\t" /* odd 8 */
9165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p4],          %[filter34]  \n\t" /* odd 8 */
9175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p1],          %[filter56]  \n\t" /* odd 8 */
9185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "dpa.w.ph         $ac1,         %[p5],          %[filter78]  \n\t" /* odd 8 */
9195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
9205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbu              %[qload1],    15(%[dst])                   \n\t" /* load odd 8 from dst */
9225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
9245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload2],    %[qload2],      %[st2]       \n\t" /* average odd 6 */
9255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
9275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload3],    %[qload3],      %[st3]       \n\t" /* average odd 7 */
9285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
9305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "addqh_r.w        %[qload1],    %[qload1],      %[st1]       \n\t" /* average odd 8 */
9315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload2],    11(%[dst])                   \n\t" /* store odd 6 to dst */
9335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload3],    13(%[dst])                   \n\t" /* store odd 7 to dst */
9345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          "sb               %[qload1],    15(%[dst])                   \n\t" /* store odd 8 to dst */
9355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          : [qload1] "=&r" (qload1), [qload2] "=&r" (qload2),
9375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [st1] "=&r" (st1), [st2] "=&r" (st2), [st3] "=&r" (st3),
9385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4),
9395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [qload3] "=&r" (qload3), [p5] "=&r" (p5),
9405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3)
9415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          : [filter12] "r" (filter12), [filter34] "r" (filter34),
9425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [filter56] "r" (filter56), [filter78] "r" (filter78),
9435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [vector_64] "r" (vector_64),
9445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang            [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src)
9455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      );
9465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      src += 16;
9485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      dst += 16;
9495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    }
9505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    /* Next row... */
9525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    src_ptr += src_stride;
9535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    dst_ptr += dst_stride;
9545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  }
9555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang}
9565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
9585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                   uint8_t *dst, ptrdiff_t dst_stride,
9595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                   const int16_t *filter_x, int x_step_q4,
9605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                   const int16_t *filter_y, int y_step_q4,
9615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                   int w, int h) {
9625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  if (((const int32_t *)filter_x)[1] == 0x800000) {
9635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_convolve_avg(src, src_stride,
9645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                     dst, dst_stride,
9655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                     filter_x, x_step_q4,
9665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                     filter_y, y_step_q4,
9675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                     w, h);
9685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  } else if (((const int32_t *)filter_x)[0] == 0) {
9695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    vp9_convolve2_avg_horiz_dspr2(src, src_stride,
9705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  dst, dst_stride,
9715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  filter_x, x_step_q4,
9725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  filter_y, y_step_q4,
9735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  w, h);
9745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  } else {
9755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    if (16 == x_step_q4) {
9765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      uint32_t pos = 38;
9775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      src -= 3;
9795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      /* bit positon for extract from acc */
9815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      __asm__ __volatile__ (
9825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        "wrdsp      %[pos],     1           \n\t"
9835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        :
9845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        : [pos] "r" (pos)
9855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      );
9865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      /* prefetch data to cache memory */
9885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      vp9_prefetch_load(src);
9895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      vp9_prefetch_load(src + 32);
9905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      vp9_prefetch_store(dst);
9915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
9925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      switch (w) {
9935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 4:
9945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          convolve_avg_horiz_4_dspr2(src, src_stride,
9955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                     dst, dst_stride,
9965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                     filter_x, h);
9975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          break;
9985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 8:
9995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          convolve_avg_horiz_8_dspr2(src, src_stride,
10005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                     dst, dst_stride,
10015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                     filter_x, h);
10025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          break;
10035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 16:
10045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          convolve_avg_horiz_16_dspr2(src, src_stride,
10055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                      dst, dst_stride,
10065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                      filter_x, h, 1);
10075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          break;
10085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 32:
10095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          convolve_avg_horiz_16_dspr2(src, src_stride,
10105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                      dst, dst_stride,
10115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                      filter_x, h, 2);
10125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          break;
10135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        case 64:
10145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          vp9_prefetch_load(src + 64);
10155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          vp9_prefetch_store(dst + 32);
10165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
10175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          convolve_avg_horiz_64_dspr2(src, src_stride,
10185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                      dst, dst_stride,
10195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                      filter_x, h);
10205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          break;
10215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang        default:
10225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          vp9_convolve8_avg_horiz_c(src + 3, src_stride,
10235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                    dst, dst_stride,
10245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                    filter_x, x_step_q4,
10255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                    filter_y, y_step_q4,
10265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                    w, h);
10275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang          break;
10285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      }
10295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    } else {
10305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      vp9_convolve8_avg_horiz_c(src, src_stride,
10315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                dst, dst_stride,
10325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                filter_x, x_step_q4,
10335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                filter_y, y_step_q4,
10345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                w, h);
10355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    }
10365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  }
10375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang}
10385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#endif
1039