15ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang/* 25ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 35ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * 45ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Use of this source code is governed by a BSD-style license 55ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * that can be found in the LICENSE file in the root of the source 65ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * tree. An additional intellectual property rights grant can be found 75ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * in the file PATENTS. All contributing project authors may 85ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * be found in the AUTHORS file in the root of the source tree. 95ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <assert.h> 125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <stdio.h> 135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vpx_config.h" 155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vp9_rtcd.h" 165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_common.h" 175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx/vpx_integer.h" 185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx_ports/mem.h" 195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_convolve.h" 205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/mips/dspr2/vp9_common_dspr2.h" 215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#if HAVE_DSPR2 235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_horiz_4_dspr2(const uint8_t *src, 245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, 265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x0, 285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h) { 295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y; 305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t vector1b, vector2b, vector3b, vector4b; 325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3, Temp4; 335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2; 355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4; 365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t n1, n2, n3, n4; 375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tn1, tn2; 385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_x0)[0]; 405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_x0)[1]; 415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_x0)[2]; 425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_x0)[3]; 435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp1] \n\t" 585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp1] \n\t" 595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tp2] \n\t" 605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tp2] \n\t" 615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tn2], 8(%[src]) \n\t" 655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac3, 31 \n\t" 675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tn2] \n\t" 725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn1], %[tn2], 3 \n\t" 735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn2], %[tp2], 3 \n\t" 745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tp2], %[tp1], 3 \n\t" 755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" 785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" 795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[p2], 3(%[dst]) \n\t" /* load odd 2 */ 825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp1], %[Temp1](%[cm]) \n\t" /* even 1 */ 855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[Temp1], 1(%[dst]) \n\t" /* load odd 1 */ 885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[n1], %[tp2] \n\t" 895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n2], %[tp2] \n\t" 905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[n3], %[tn2] \n\t" 915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n4], %[tn2] \n\t" 925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" 955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n4], %[vector4b] \n\t" 965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tn2], 0(%[dst]) \n\t" /* load even 1 */ 995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp2], %[Temp3](%[cm]) \n\t" /* even 2 */ 1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[n1], %[tn1] \n\t" 1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tn1], %[Temp2](%[cm]) \n\t" /* odd 1 */ 1065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tn2], %[tn2], %[tp1] \n\t" /* average even 1 */ 1075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" 1085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" 1095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" 1105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n1], %[vector4b] \n\t" 1115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp4], $ac2, 31 \n\t" 1125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tp1], 2(%[dst]) \n\t" /* load even 2 */ 1145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tn2], 0(%[dst]) \n\t" /* store even 1 */ 1155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* clamp */ 1175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[Temp1], %[Temp1], %[tn1] \n\t" /* average odd 1 */ 1185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[n2], %[Temp4](%[cm]) \n\t" /* odd 2 */ 1195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp1], 1(%[dst]) \n\t" /* store odd 1 */ 1205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tp1], %[tp1], %[tp2] \n\t" /* average even 2 */ 1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp1], 2(%[dst]) \n\t" /* store even 2 */ 1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[p2], %[p2], %[n2] \n\t" /* average odd 2 */ 1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[p2], 3(%[dst]) \n\t" /* store odd 2 */ 1265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 1285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tn1] "=&r" (tn1), [tn2] "=&r" (tn2), 1295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4), 1305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), [n4] "=&r" (n4), 1315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 1325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) 1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 1345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector3b] "r" (vector3b), [vector4b] "r" (vector4b), 1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector4a] "r" (vector4a), 1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src) 1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_horiz_8_dspr2(const uint8_t *src, 1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, 1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x0, 1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h) { 1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y; 1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 1545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t vector1b, vector2b, vector3b, vector4b; 1555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 1565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2; 1575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, n1; 1585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tn1, tn2, tn3; 1595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st0, st1; 1605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_x0)[0]; 1625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_x0)[1]; 1635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_x0)[2]; 1645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_x0)[3]; 1655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 1675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 1685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 1695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 1705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 1715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 1735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 1745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 1755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 1775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 1785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 1795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 1805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 1815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp1] \n\t" 1825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp1] \n\t" 1835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tp2] \n\t" 1845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tp2] \n\t" 1855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tn2], 8(%[src]) \n\t" 1865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 1875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 1885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 1895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 1905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac3, 31 \n\t" 1915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[Temp2], 0(%[dst]) \n\t" 1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tn3], 2(%[dst]) \n\t" 1935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 1955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tn2] \n\t" 1965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n1], %[tn2] \n\t" 1975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tn1], 12(%[src]) \n\t" 1985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 1995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 2005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" 2015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" 2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 2035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 2055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st0], %[Temp1](%[cm]) \n\t" 2065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 2075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 2085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[tn1] \n\t" 2095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp3](%[cm]) \n\t" 2105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[vector1b] \n\t" 2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[vector2b] \n\t" 2125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[vector3b] \n\t" 2135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector4b] \n\t" 2145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" 2155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[Temp2], %[Temp2], %[st0] \n\t" 2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tn3], %[tn3], %[st1] \n\t" 2185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp2], 0(%[dst]) \n\t" 2195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tn3], 2(%[dst]) \n\t" 2205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 2225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 2255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 2265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn3], %[tn1], 3 \n\t" 2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn1], %[tn2], 3 \n\t" 2295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn2], %[tp2], 3 \n\t" 2305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tp2], %[tp1], 3 \n\t" 2315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st0], %[Temp1](%[cm]) \n\t" 2335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[Temp2], 4(%[dst]) \n\t" 2345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[Temp2], %[Temp2], %[st0] \n\t" 2355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" 2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" 2385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" 2395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 2405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 2415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 2435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 2445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 2455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp2], 4(%[dst]) \n\t" 2465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp2] \n\t" 2475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp2] \n\t" 2485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tn2] \n\t" 2495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tn2] \n\t" 2505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 2515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 2525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 2535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 2545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 2555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tp1], 6(%[dst]) \n\t" 2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 2595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 2605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 2615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 2625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 2635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tn1] \n\t" 2645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n1], %[tn1] \n\t" 2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st0], %[Temp3](%[cm]) \n\t" 2665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[vector1b] \n\t" 2675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[vector2b] \n\t" 2685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[vector3b] \n\t" 2695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[vector4b] \n\t" 2705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac1, 31 \n\t" 2715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tp2], 1(%[dst]) \n\t" 2735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tn2], 3(%[dst]) \n\t" 2745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tp1], %[tp1], %[st0] \n\t" 2755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 2775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp2](%[cm]) \n\t" 2785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[tn3] \n\t" 2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" 2805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" 2815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" 2825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector4b] \n\t" 2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tp2], %[tp2], %[st1] \n\t" 2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tn3], 5(%[dst]) \n\t" 2875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp2], 1(%[dst]) \n\t" 2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp1], 6(%[dst]) \n\t" 2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" 2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" 2935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" 2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac2, 31 \n\t" 2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tn1], 7(%[dst]) \n\t" 2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* clamp */ 3005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p4], %[Temp3](%[cm]) \n\t" 3015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tn2], %[tn2], %[p4] \n\t" 3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p2], %[Temp2](%[cm]) \n\t" 3045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tn3], %[tn3], %[p2] \n\t" 3055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[n1], %[Temp1](%[cm]) \n\t" 3075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tn1], %[tn1], %[n1] \n\t" 3085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* store bytes */ 3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tn2], 3(%[dst]) \n\t" 3115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tn3], 5(%[dst]) \n\t" 3125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tn1], 7(%[dst]) \n\t" 3135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 3155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tn1] "=&r" (tn1), [tn2] "=&r" (tn2), [tn3] "=&r" (tn3), 3165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [st0] "=&r" (st0), [st1] "=&r" (st1), 3175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4), 3185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [n1] "=&r" (n1), 3195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3) 3205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 3215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector3b] "r" (vector3b), [vector4b] "r" (vector4b), 3225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector4a] "r" (vector4a), 3235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src) 3245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 3255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 3275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 3285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 3315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_horiz_16_dspr2(const uint8_t *src_ptr, 3335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 3345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst_ptr, 3355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 3365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x0, 3375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h, 3385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t count) { 3395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y, c; 3405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src; 3415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst; 3425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 3435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector_64 = 64; 3445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t filter12, filter34, filter56, filter78; 3455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 3465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t qload1, qload2, qload3; 3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, p5; 3485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st1, st2, st3; 3495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter12 = ((const int32_t *)filter_x0)[0]; 3515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter34 = ((const int32_t *)filter_x0)[1]; 3525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter56 = ((const int32_t *)filter_x0)[2]; 3535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter78 = ((const int32_t *)filter_x0)[3]; 3545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 3565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src = src_ptr; 3575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = dst_ptr; 3585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 3605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride); 3615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride + 32); 3625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst_ptr + dst_stride); 3635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (c = 0; c < count; c++) { 3655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 3665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 0(%[src]) \n\t" 3675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 4(%[src]) \n\t" 3685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 3705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ 3715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ 3735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 3785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 8(%[src]) \n\t" 3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ 3805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ 3815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ 3825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ 3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ 3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st2], 0(%[dst]) \n\t" /* load even 1 from dst */ 3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ 3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 3895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 3905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 3915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 12(%[src]) \n\t" 3925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ 3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ 3945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ 3955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ 3965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ 3975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ 3985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 2(%[dst]) \n\t" /* load even 2 from dst */ 4005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 4025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ 4035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 1 */ 4055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 4065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* store even 1 to dst */ 4075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ 4085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ 4095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ 4105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ 4115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ 4125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ 4135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ 4165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 4175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st2] \n\t" /* average even 2 */ 4185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 4195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 2(%[dst]) \n\t" /* store even 2 to dst */ 4205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 16(%[src]) \n\t" 4215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 4(%[dst]) \n\t" /* load even 3 from dst */ 4225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 6(%[dst]) \n\t" /* load even 4 from dst */ 4235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ 4245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ 4255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ 4265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ 4275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ 4285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ 4295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 5. pixel */ 4315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ 4325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 4335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 3 */ 4345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 4355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 4(%[dst]) \n\t" /* store even 3 to dst */ 4365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ 4375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ 4385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ 4395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ 4405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ 4415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ 4425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 6. pixel */ 4445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ 4455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average even 4 */ 4475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 4485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 6(%[dst]) \n\t" /* store even 4 to dst */ 4495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 20(%[src]) \n\t" 4505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ 4515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ 4525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ 4535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ 4545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 8(%[dst]) \n\t" /* load even 5 from dst */ 4555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ 4565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ 4575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 7. pixel */ 4595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ 4605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 4615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 5 */ 4625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload3] \n\t" 4635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 8(%[dst]) \n\t" /* store even 5 to dst */ 4645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ 4655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ 4665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ 4675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ 4685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 10(%[dst]) \n\t" /* load even 6 from dst */ 4695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ 4705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ 4715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st2], 12(%[dst]) \n\t" /* load even 7 from dst */ 4735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 8. pixel */ 4755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ 4765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 4775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 6 */ 4785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ 4795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ 4805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 10(%[dst]) \n\t" /* store even 6 to dst */ 4815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ 4825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ 4835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ 4845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ 4855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* ODD pixels */ 4875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 1(%[src]) \n\t" 4885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 5(%[src]) \n\t" 4895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 7 */ 4915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 4935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ 4945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 4965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 4975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 4985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 4995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 12(%[dst]) \n\t" /* store even 7 to dst */ 5005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 9(%[src]) \n\t" 5015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ 5025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ 5035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 14(%[dst]) \n\t" /* load even 8 from dst */ 5045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ 5055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ 5065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ 5075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ 5085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st1], 1(%[dst]) \n\t" /* load odd 1 from dst */ 5105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 5125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ 5135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 5145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 8 */ 5155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 5165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 5175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 14(%[dst]) \n\t" /* store even 8 to dst */ 5185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 13(%[src]) \n\t" 5195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ 5205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ 5215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ 5225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ 5235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 3(%[dst]) \n\t" /* load odd 2 from dst */ 5245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ 5255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ 5265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 5285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ 5295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 5305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st3], %[st3], %[st1] \n\t" /* average odd 1 */ 5315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 5325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ 5335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ 5345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ 5355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ 5365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 1(%[dst]) \n\t" /* store odd 1 to dst */ 5375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ 5385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ 5395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 5415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ 5425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 5435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st1] \n\t" /* average odd 2 */ 5445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 5455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 3(%[dst]) \n\t" /* store odd 2 to dst */ 5465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 5(%[dst]) \n\t" /* load odd 3 from dst */ 5475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 17(%[src]) \n\t" 5485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ 5495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ 5505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ 5515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ 5525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ 5535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ 5545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st1], 7(%[dst]) \n\t" /* load odd 4 from dst */ 5565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 5. pixel */ 5585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ 5595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 5605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st2] \n\t" /* average odd 3 */ 5615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 5625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 5(%[dst]) \n\t" /* store odd 3 to dst */ 5635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ 5645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ 5655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ 5665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ 5675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ 5685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ 5695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 9(%[dst]) \n\t" /* load odd 5 from dst */ 5715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 6. pixel */ 5735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ 5745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 5755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st1], %[st1], %[st3] \n\t" /* average odd 4 */ 5765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 5775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 7(%[dst]) \n\t" /* store odd 4 to dst */ 5785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 21(%[src]) \n\t" 5795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ 5805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ 5815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ 5825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ 5835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ 5845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ 5855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 7. pixel */ 5875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ 5885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 5895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 5 */ 5905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload3] \n\t" 5915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 9(%[dst]) \n\t" /* store odd 5 to dst */ 5925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 11(%[dst]) \n\t" /* load odd 6 from dst */ 5935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ 5945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ 5955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ 5965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ 5975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ 5985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 13(%[dst]) \n\t" /* load odd 7 from dst */ 6005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 8. pixel */ 6025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ 6035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ 6045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ 6055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ 6065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ 6075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 15(%[dst]) \n\t" /* load odd 8 from dst */ 6095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ 6115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average odd 6 */ 6125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ 6145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average odd 7 */ 6155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ 6175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 8 */ 6185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 11(%[dst]) \n\t" /* store odd 6 to dst */ 6205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 13(%[dst]) \n\t" /* store odd 7 to dst */ 6215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 15(%[dst]) \n\t" /* store odd 8 to dst */ 6225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [qload1] "=&r" (qload1), [qload2] "=&r" (qload2), 6245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [st1] "=&r" (st1), [st2] "=&r" (st2), [st3] "=&r" (st3), 6255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4), 6265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [qload3] "=&r" (qload3), [p5] "=&r" (p5), 6275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3) 6285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [filter12] "r" (filter12), [filter34] "r" (filter34), 6295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [filter56] "r" (filter56), [filter78] "r" (filter78), 6305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector_64] "r" (vector_64), 6315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src) 6325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 6335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += 16; 6355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 16; 6365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 6395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr += src_stride; 6405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr += dst_stride; 6415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 6435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_horiz_64_dspr2(const uint8_t *src_ptr, 6455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 6465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst_ptr, 6475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 6485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x0, 6495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h) { 6505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y, c; 6515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src; 6525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst; 6535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 6545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector_64 = 64; 6555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t filter12, filter34, filter56, filter78; 6565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 6575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t qload1, qload2, qload3; 6585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, p5; 6595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st1, st2, st3; 6605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter12 = ((const int32_t *)filter_x0)[0]; 6625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter34 = ((const int32_t *)filter_x0)[1]; 6635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter56 = ((const int32_t *)filter_x0)[2]; 6645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter78 = ((const int32_t *)filter_x0)[3]; 6655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 6675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src = src_ptr; 6685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = dst_ptr; 6695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 6715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride); 6725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride + 32); 6735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride + 64); 6745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst_ptr + dst_stride); 6755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst_ptr + dst_stride + 32); 6765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (c = 0; c < 4; c++) { 6785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 6795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 0(%[src]) \n\t" 6805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 4(%[src]) \n\t" 6815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 6835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ 6845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 6855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ 6865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 6875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 6885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 6895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 6905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 6915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 8(%[src]) \n\t" 6925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ 6935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ 6945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ 6955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ 6965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ 6975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st2], 0(%[dst]) \n\t" /* load even 1 from dst */ 6985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 7005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ 7015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 7025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 7035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 7045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 12(%[src]) \n\t" 7055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ 7065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ 7075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ 7085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ 7095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ 7105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ 7115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 2(%[dst]) \n\t" /* load even 2 from dst */ 7135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 7155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ 7165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 7175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 1 */ 7185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 7195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* store even 1 to dst */ 7205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ 7215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ 7225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ 7235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ 7245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ 7255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ 7265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 7285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ 7295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 7305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st2] \n\t" /* average even 2 */ 7315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 7325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 2(%[dst]) \n\t" /* store even 2 to dst */ 7335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 16(%[src]) \n\t" 7345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 4(%[dst]) \n\t" /* load even 3 from dst */ 7355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 6(%[dst]) \n\t" /* load even 4 from dst */ 7365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ 7375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ 7385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ 7395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ 7405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ 7415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ 7425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 5. pixel */ 7445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ 7455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 7465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 3 */ 7475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 7485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 4(%[dst]) \n\t" /* store even 3 to dst */ 7495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ 7505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ 7515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ 7525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ 7535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ 7545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ 7555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 6. pixel */ 7575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ 7585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 7595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average even 4 */ 7605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 7615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 6(%[dst]) \n\t" /* store even 4 to dst */ 7625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 20(%[src]) \n\t" 7635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ 7645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ 7655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ 7665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ 7675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 8(%[dst]) \n\t" /* load even 5 from dst */ 7685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ 7695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ 7705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 7. pixel */ 7725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ 7735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 7745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 5 */ 7755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload3] \n\t" 7765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 8(%[dst]) \n\t" /* store even 5 to dst */ 7775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ 7785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ 7795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ 7805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ 7815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 10(%[dst]) \n\t" /* load even 6 from dst */ 7825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ 7835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ 7845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st2], 12(%[dst]) \n\t" /* load even 7 from dst */ 7865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 8. pixel */ 7885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ 7895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 7905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 6 */ 7915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ 7925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ 7935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 10(%[dst]) \n\t" /* store even 6 to dst */ 7945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ 7955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ 7965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ 7975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ 7985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* ODD pixels */ 8005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 1(%[src]) \n\t" 8015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 5(%[src]) \n\t" 8025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 7 */ 8045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 8065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ 8075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 8085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 8095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 8105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 8115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 8125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 12(%[dst]) \n\t" /* store even 7 to dst */ 8135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 9(%[src]) \n\t" 8145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ 8155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ 8165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 14(%[dst]) \n\t" /* load even 8 from dst */ 8175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ 8185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ 8195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ 8205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ 8215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st1], 1(%[dst]) \n\t" /* load odd 1 from dst */ 8235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 8255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ 8265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 8275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 8 */ 8285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 8295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 8305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 14(%[dst]) \n\t" /* store even 8 to dst */ 8315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 13(%[src]) \n\t" 8325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ 8335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ 8345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ 8355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ 8365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 3(%[dst]) \n\t" /* load odd 2 from dst */ 8375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ 8385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ 8395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 8415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ 8425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 8435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st3], %[st3], %[st1] \n\t" /* average odd 1 */ 8445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 8455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ 8465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ 8475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ 8485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ 8495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 1(%[dst]) \n\t" /* store odd 1 to dst */ 8505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ 8515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ 8525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 8545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ 8555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 8565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st1] \n\t" /* average odd 2 */ 8575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 8585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 3(%[dst]) \n\t" /* store odd 2 to dst */ 8595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 5(%[dst]) \n\t" /* load odd 3 from dst */ 8605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 17(%[src]) \n\t" 8615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ 8625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ 8635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ 8645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ 8655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ 8665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ 8675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st1], 7(%[dst]) \n\t" /* load odd 4 from dst */ 8695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 5. pixel */ 8715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ 8725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 8735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st2] \n\t" /* average odd 3 */ 8745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 8755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 5(%[dst]) \n\t" /* store odd 3 to dst */ 8765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ 8775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ 8785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ 8795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ 8805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ 8815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ 8825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 9(%[dst]) \n\t" /* load odd 5 from dst */ 8845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 6. pixel */ 8865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ 8875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 8885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st1], %[st1], %[st3] \n\t" /* average odd 4 */ 8895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 8905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 7(%[dst]) \n\t" /* store odd 4 to dst */ 8915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 21(%[src]) \n\t" 8925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ 8935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ 8945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ 8955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ 8965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ 8975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ 8985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 7. pixel */ 9005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ 9015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 9025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 5 */ 9035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload3] \n\t" 9045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 9(%[dst]) \n\t" /* store odd 5 to dst */ 9055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 11(%[dst]) \n\t" /* load odd 6 from dst */ 9065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ 9075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ 9085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ 9095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ 9105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ 9115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 13(%[dst]) \n\t" /* load odd 7 from dst */ 9135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 8. pixel */ 9155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ 9165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ 9175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ 9185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ 9195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ 9205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 15(%[dst]) \n\t" /* load odd 8 from dst */ 9225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ 9245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average odd 6 */ 9255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ 9275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average odd 7 */ 9285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ 9305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 8 */ 9315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 11(%[dst]) \n\t" /* store odd 6 to dst */ 9335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 13(%[dst]) \n\t" /* store odd 7 to dst */ 9345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 15(%[dst]) \n\t" /* store odd 8 to dst */ 9355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [qload1] "=&r" (qload1), [qload2] "=&r" (qload2), 9375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [st1] "=&r" (st1), [st2] "=&r" (st2), [st3] "=&r" (st3), 9385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4), 9395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [qload3] "=&r" (qload3), [p5] "=&r" (p5), 9405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3) 9415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [filter12] "r" (filter12), [filter34] "r" (filter34), 9425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [filter56] "r" (filter56), [filter78] "r" (filter78), 9435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector_64] "r" (vector_64), 9445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [cm] "r" (cm), [dst] "r" (dst), [src] "r" (src) 9455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 9465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += 16; 9485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 16; 9495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 9505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 9525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr += src_stride; 9535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr += dst_stride; 9545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 9555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 9565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, 9585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 9595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x, int x_step_q4, 9605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_y, int y_step_q4, 9615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int w, int h) { 9625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (((const int32_t *)filter_x)[1] == 0x800000) { 9635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve_avg(src, src_stride, 9645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 9655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 9665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 9675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 9685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else if (((const int32_t *)filter_x)[0] == 0) { 9695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve2_avg_horiz_dspr2(src, src_stride, 9705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 9715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 9725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 9735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 9745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 9755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (16 == x_step_q4) { 9765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t pos = 38; 9775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src -= 3; 9795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* bit positon for extract from acc */ 9815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 9825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "wrdsp %[pos], 1 \n\t" 9835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : 9845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [pos] "r" (pos) 9855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 9865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 9885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src); 9895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + 32); 9905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst); 9915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang switch (w) { 9935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 4: 9945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_avg_horiz_4_dspr2(src, src_stride, 9955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 9965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, h); 9975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 9985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 8: 9995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_avg_horiz_8_dspr2(src, src_stride, 10005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, h); 10025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 16: 10045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_avg_horiz_16_dspr2(src, src_stride, 10055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, h, 1); 10075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 32: 10095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_avg_horiz_16_dspr2(src, src_stride, 10105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, h, 2); 10125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 64: 10145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + 64); 10155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + 32); 10165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 10175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_avg_horiz_64_dspr2(src, src_stride, 10185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, h); 10205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang default: 10225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve8_avg_horiz_c(src + 3, src_stride, 10235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 10255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 10265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 10275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 10295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 10305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve8_avg_horiz_c(src, src_stride, 10315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 10335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 10345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 10355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 10365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 10375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 10385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#endif 1039