15ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang/* 25ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 35ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * 45ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Use of this source code is governed by a BSD-style license 55ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * that can be found in the LICENSE file in the root of the source 65ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * tree. An additional intellectual property rights grant can be found 75ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * in the file PATENTS. All contributing project authors may 85ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * be found in the AUTHORS file in the root of the source tree. 95ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <assert.h> 125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <stdio.h> 135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_dsp_rtcd.h" 15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/mips/convolve_common_dspr2.h" 16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/vpx_convolve.h" 17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/vpx_dsp_common.h" 185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx_ports/mem.h" 195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#if HAVE_DSPR2 215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_bi_avg_horiz_4_dspr2(const uint8_t *src, 227bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 247bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_x0, int32_t h) { 255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y; 26da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *cm = vpx_ff_cropTbl; 277bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t Temp1, Temp2, Temp3, Temp4; 285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2; 305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3; 315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tn1, tn2; 325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter = &filter_x0[3]; 337bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t filter45; 345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter45 = ((const int32_t *)filter)[0]; 365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 39da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride); 40da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 32); 41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 437bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp1] \n\t" 515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp1] \n\t" 525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" 535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac3, 31 \n\t" 545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tp2], %[tp1], 3 \n\t" 595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" 605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 627bc9febe8749e98a3812a0dc4380ceae75c29450Johann "lbu %[p2], 3(%[dst]) \n\t" /* load odd 2 */ 635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 657bc9febe8749e98a3812a0dc4380ceae75c29450Johann "lbux %[tp1], %[Temp1](%[cm]) \n\t" /* even 1 */ 665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 687bc9febe8749e98a3812a0dc4380ceae75c29450Johann "lbu %[Temp1], 1(%[dst]) \n\t" /* load odd 1 */ 695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp2] \n\t" 705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[tp2] \n\t" 715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" 725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 747bc9febe8749e98a3812a0dc4380ceae75c29450Johann "lbu %[tn2], 0(%[dst]) \n\t" /* load even 1 */ 755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 777bc9febe8749e98a3812a0dc4380ceae75c29450Johann "lbux %[tp2], %[Temp3](%[cm]) \n\t" /* even 2 */ 785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 807bc9febe8749e98a3812a0dc4380ceae75c29450Johann "lbux %[tn1], %[Temp2](%[cm]) \n\t" /* odd 1 */ 817bc9febe8749e98a3812a0dc4380ceae75c29450Johann "addqh_r.w %[tn2], %[tn2], %[tp1] \n\t" /* average even 1 */ 825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" 835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp4], $ac2, 31 \n\t" 845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 857bc9febe8749e98a3812a0dc4380ceae75c29450Johann "lbu %[tp1], 2(%[dst]) \n\t" /* load even 2 */ 867bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sb %[tn2], 0(%[dst]) \n\t" /* store even 1 */ 875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* clamp */ 897bc9febe8749e98a3812a0dc4380ceae75c29450Johann "addqh_r.w %[Temp1], %[Temp1], %[tn1] \n\t" /* average odd 1 */ 907bc9febe8749e98a3812a0dc4380ceae75c29450Johann "lbux %[p3], %[Temp4](%[cm]) \n\t" /* odd 2 */ 917bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sb %[Temp1], 1(%[dst]) \n\t" /* store odd 1 */ 925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 937bc9febe8749e98a3812a0dc4380ceae75c29450Johann "addqh_r.w %[tp1], %[tp1], %[tp2] \n\t" /* average even 2 */ 947bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sb %[tp1], 2(%[dst]) \n\t" /* store even 2 */ 955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 967bc9febe8749e98a3812a0dc4380ceae75c29450Johann "addqh_r.w %[p2], %[p2], %[p3] \n\t" /* average odd 2 */ 977bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sb %[p2], 3(%[dst]) \n\t" /* store odd 2 */ 985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 997bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), 1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), 1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), 1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp4] "=&r"(Temp4) 1037bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm), 1047bc9febe8749e98a3812a0dc4380ceae75c29450Johann [dst] "r"(dst), [src] "r"(src)); 1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 1075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 1085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 1095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 1115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_bi_avg_horiz_8_dspr2(const uint8_t *src, 1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst, 1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t dst_stride, 1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_x0, int32_t h) { 1165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y; 117da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *cm = vpx_ff_cropTbl; 1185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 1195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 1205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2, tp3, tp4; 1215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, n1; 1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st0, st1; 1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter = &filter_x0[3]; 1247bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t filter45; 1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter45 = ((const int32_t *)filter)[0]; 1275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 1295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 130da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride); 131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 32); 132da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp1] \n\t" 1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp1] \n\t" 1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tp2] \n\t" 1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tp2] \n\t" 1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 8(%[src]) \n\t" 1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" 1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac3, 31 \n\t" 1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[Temp2], 0(%[dst]) \n\t" 1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tp4], 2(%[dst]) \n\t" 1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 1545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" 1555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 1565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 1585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st0], %[Temp1](%[cm]) \n\t" 1595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 1605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 1615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp3](%[cm]) \n\t" 1625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" 1635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" 1645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[Temp2], %[Temp2], %[st0] \n\t" 1665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tp4], %[tp4], %[st1] \n\t" 1675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp2], 0(%[dst]) \n\t" 1685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp4], 2(%[dst]) \n\t" 1695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 1715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 1725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 1735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 1745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 1755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tp3], %[tp2], 3 \n\t" 1775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tp2], %[tp1], 3 \n\t" 1785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st0], %[Temp1](%[cm]) \n\t" 1805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[Temp2], 4(%[dst]) \n\t" 1815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[Temp2], %[Temp2], %[st0] \n\t" 1825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter45] \n\t" 1845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 1855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 1875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 1885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 1895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp2], 4(%[dst]) \n\t" 1905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp2] \n\t" 1915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp2] \n\t" 1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tp3] \n\t" 1935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tp3] \n\t" 1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" 1955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 1965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tp1], 6(%[dst]) \n\t" 1985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 2005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 2015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 2035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 2045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st0], %[Temp3](%[cm]) \n\t" 2055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" 2065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac1, 31 \n\t" 2075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tp2], 1(%[dst]) \n\t" 2095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tp3], 3(%[dst]) \n\t" 2105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tp1], %[tp1], %[st0] \n\t" 2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 2135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp2](%[cm]) \n\t" 2145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" 2155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tp2], %[tp2], %[st1] \n\t" 2165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tp4], 5(%[dst]) \n\t" 2195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 2215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp2], 1(%[dst]) \n\t" 2225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp1], 6(%[dst]) \n\t" 2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter45] \n\t" 2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac2, 31 \n\t" 2255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[tp1], 7(%[dst]) \n\t" 2275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* clamp */ 2295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p4], %[Temp3](%[cm]) \n\t" 2305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tp3], %[tp3], %[p4] \n\t" 2315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p2], %[Temp2](%[cm]) \n\t" 2335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tp4], %[tp4], %[p2] \n\t" 2345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p1], %[Temp1](%[cm]) \n\t" 2365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[tp1], %[tp1], %[p1] \n\t" 2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* store bytes */ 2395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp3], 3(%[dst]) \n\t" 2405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp4], 5(%[dst]) \n\t" 2415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp1], 7(%[dst]) \n\t" 2425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2437bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), 2447bc9febe8749e98a3812a0dc4380ceae75c29450Johann [tp4] "=&r"(tp4), [st0] "=&r"(st0), [st1] "=&r"(st1), [p1] "=&r"(p1), 2457bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [n1] "=&r"(n1), 2467bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) 2477bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm), 2487bc9febe8749e98a3812a0dc4380ceae75c29450Johann [dst] "r"(dst), [src] "r"(src)); 2495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 2515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 2525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 2535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 2545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 2555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_bi_avg_horiz_16_dspr2(const uint8_t *src_ptr, 2577bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst_ptr, 2587bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t dst_stride, 2597bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_x0, int32_t h, 2607bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t count) { 2615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y, c; 2625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src; 2635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst; 264da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *cm = vpx_ff_cropTbl; 2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector_64 = 64; 2665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 2675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t qload1, qload2, qload3; 2685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, p5; 2695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st1, st2, st3; 2705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter = &filter_x0[3]; 2717bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t filter45; 2725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter45 = ((const int32_t *)filter)[0]; 2745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 2765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src = src_ptr; 2775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = dst_ptr; 2785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 280da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride); 281da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride + 32); 282da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst_ptr + dst_stride); 2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (c = 0; c < count; c++) { 2857bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 2865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 0(%[src]) \n\t" 2875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 4(%[src]) \n\t" 2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ 2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ 2935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 2975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 8(%[src]) \n\t" 2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* even 1 */ 3005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ 3015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st2], 0(%[dst]) \n\t" /* load even 1 from dst */ 3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 3045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ 3055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 3065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 3075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 3085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 12(%[src]) \n\t" 3095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" /* even 1 */ 3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ 3115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ 3125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 2(%[dst]) \n\t" /* load even 2 from dst */ 3145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 3165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ 3175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 3185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 1 */ 3195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 3205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* store even 1 to dst */ 3215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" /* even 3 */ 3225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ 3235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ 3245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 3265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ 3275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 3285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st2] \n\t" /* average even 2 */ 3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 2(%[dst]) \n\t" /* store even 2 to dst */ 3315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 4(%[dst]) \n\t" /* load even 3 from dst */ 3325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 6(%[dst]) \n\t" /* load even 4 from dst */ 3335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter45] \n\t" /* even 4 */ 3345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ 3355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ 3365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 5. pixel */ 3385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ 3395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 3405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 3 */ 3415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 4(%[dst]) \n\t" /* store even 3 to dst */ 3425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" /* even 5 */ 3435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ 3445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ 3455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 6. pixel */ 3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ 3485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 3495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average even 4 */ 3505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 6(%[dst]) \n\t" /* store even 4 to dst */ 3515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter45] \n\t" /* even 6 */ 3525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 8(%[dst]) \n\t" /* load even 5 from dst */ 3535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ 3545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ 3555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 7. pixel */ 3575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ 3585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 3595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 5 */ 3605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 8(%[dst]) \n\t" /* store even 5 to dst */ 3615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* even 7 */ 3625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 10(%[dst]) \n\t" /* load even 6 from dst */ 3635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ 3645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ 3655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st2], 12(%[dst]) \n\t" /* load even 7 from dst */ 3675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 8. pixel */ 3695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ 3705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 3715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 6 */ 3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* even 8 */ 3735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 10(%[dst]) \n\t" /* store even 6 to dst */ 3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ 3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ 3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* ODD pixels */ 3785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 1(%[src]) \n\t" 3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 5(%[src]) \n\t" 3805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 7 */ 3825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ 3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 3865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 3895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 3905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 12(%[dst]) \n\t" /* store even 7 to dst */ 3915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 9(%[src]) \n\t" 3925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" /* odd 1 */ 3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 14(%[dst]) \n\t" /* load even 8 from dst */ 3945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ 3955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ 3965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st1], 1(%[dst]) \n\t" /* load odd 1 from dst */ 3985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 4005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ 4015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 4025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 8 */ 4035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 4045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 4055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 14(%[dst]) \n\t" /* store even 8 to dst */ 4065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 13(%[src]) \n\t" 4075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* odd 2 */ 4085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 3(%[dst]) \n\t" /* load odd 2 from dst */ 4095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ 4105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ 4115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 4135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ 4145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st3], %[st3], %[st1] \n\t" /* average odd 1 */ 4165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 4175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* odd 3 */ 4185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 1(%[dst]) \n\t" /* store odd 1 to dst */ 4195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ 4205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ 4215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 4235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ 4245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st1] \n\t" /* average odd 2 */ 4265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 4275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 3(%[dst]) \n\t" /* store odd 2 to dst */ 4285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 5(%[dst]) \n\t" /* load odd 3 from dst */ 4295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter45] \n\t" /* odd 4 */ 4305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ 4315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ 4325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st1], 7(%[dst]) \n\t" /* load odd 4 from dst */ 4345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 5. pixel */ 4365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ 4375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 4385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st2] \n\t" /* average odd 3 */ 4395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 5(%[dst]) \n\t" /* store odd 3 to dst */ 4405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* odd 5 */ 4415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ 4425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ 4435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 9(%[dst]) \n\t" /* load odd 5 from dst */ 4455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 6. pixel */ 4475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ 4485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 4495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st1], %[st1], %[st3] \n\t" /* average odd 4 */ 4505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 7(%[dst]) \n\t" /* store odd 4 to dst */ 4515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter45] \n\t" /* odd 6 */ 4525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ 4535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ 4545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 7. pixel */ 4565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ 4575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 5 */ 4595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 9(%[dst]) \n\t" /* store odd 5 to dst */ 4605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 11(%[dst]) \n\t" /* load odd 6 from dst */ 4615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" /* odd 7 */ 4625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ 4635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 13(%[dst]) \n\t" /* load odd 7 from dst */ 4655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 8. pixel */ 4675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" /* odd 8 */ 4685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ 4695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 15(%[dst]) \n\t" /* load odd 8 from dst */ 4715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ 4735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average odd 6 */ 4745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ 4765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average odd 7 */ 4775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ 4795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 8 */ 4805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 11(%[dst]) \n\t" /* store odd 6 to dst */ 4825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 13(%[dst]) \n\t" /* store odd 7 to dst */ 4835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 15(%[dst]) \n\t" /* store odd 8 to dst */ 4845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4857bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [st1] "=&r"(st1), 4867bc9febe8749e98a3812a0dc4380ceae75c29450Johann [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), 4877bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p3] "=&r"(p3), [p4] "=&r"(p4), [qload3] "=&r"(qload3), 4887bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), 4897bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp3] "=&r"(Temp3) 4907bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm), 4917bc9febe8749e98a3812a0dc4380ceae75c29450Johann [dst] "r"(dst), [src] "r"(src)); 4925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += 16; 4945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 16; 4955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 4985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr += src_stride; 4995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr += dst_stride; 5005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 5015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 5025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_bi_avg_horiz_64_dspr2(const uint8_t *src_ptr, 5047bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t src_stride, uint8_t *dst_ptr, 5057bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t dst_stride, 5067bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_x0, 5077bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t h) { 5085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y, c; 5095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src; 5105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst; 511da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *cm = vpx_ff_cropTbl; 5125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector_64 = 64; 5135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 5145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t qload1, qload2, qload3; 5155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, p5; 5165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st1, st2, st3; 5175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter = &filter_x0[3]; 5187bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t filter45; 5195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter45 = ((const int32_t *)filter)[0]; 5215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 5235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src = src_ptr; 5245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = dst_ptr; 5255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 527da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride); 528da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride + 32); 529da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride + 64); 530da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst_ptr + dst_stride); 531da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst_ptr + dst_stride + 32); 5325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (c = 0; c < 4; c++) { 5347bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 5355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 0(%[src]) \n\t" 5365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 4(%[src]) \n\t" 5375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 5395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ 5405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 5415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ 5425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 5435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 5445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 5455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 5465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 5475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 8(%[src]) \n\t" 5485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* even 1 */ 5495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ 5505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st2], 0(%[dst]) \n\t" /* load even 1 from dst */ 5515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 5535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ 5545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 5555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 5565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 5575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 12(%[src]) \n\t" 5585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" /* even 1 */ 5595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ 5605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ 5615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 2(%[dst]) \n\t" /* load even 2 from dst */ 5635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 5655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ 5665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 5675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 1 */ 5685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 5695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* store even 1 to dst */ 5705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" /* even 3 */ 5715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ 5725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ 5735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 5755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ 5765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 5775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st2] \n\t" /* average even 2 */ 5785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 5795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 2(%[dst]) \n\t" /* store even 2 to dst */ 5805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 4(%[dst]) \n\t" /* load even 3 from dst */ 5815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 6(%[dst]) \n\t" /* load even 4 from dst */ 5825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter45] \n\t" /* even 4 */ 5835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ 5845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ 5855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 5. pixel */ 5875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ 5885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 5895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 3 */ 5905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 4(%[dst]) \n\t" /* store even 3 to dst */ 5915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" /* even 5 */ 5925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ 5935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ 5945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 6. pixel */ 5965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ 5975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 5985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average even 4 */ 5995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 6(%[dst]) \n\t" /* store even 4 to dst */ 6005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter45] \n\t" /* even 6 */ 6015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 8(%[dst]) \n\t" /* load even 5 from dst */ 6025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ 6035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ 6045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 7. pixel */ 6065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ 6075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 6085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 5 */ 6095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 8(%[dst]) \n\t" /* store even 5 to dst */ 6105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* even 7 */ 6115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 10(%[dst]) \n\t" /* load even 6 from dst */ 6125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ 6135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ 6145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st2], 12(%[dst]) \n\t" /* load even 7 from dst */ 6165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 8. pixel */ 6185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ 6195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 6205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 6 */ 6215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* even 8 */ 6225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 10(%[dst]) \n\t" /* store even 6 to dst */ 6235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ 6245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ 6255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* ODD pixels */ 6275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 1(%[src]) \n\t" 6285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 5(%[src]) \n\t" 6295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 7 */ 6315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 6335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ 6345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 6355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 6365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 6375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 6385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 6395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 12(%[dst]) \n\t" /* store even 7 to dst */ 6405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 9(%[src]) \n\t" 6415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" /* odd 1 */ 6425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 14(%[dst]) \n\t" /* load even 8 from dst */ 6435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ 6445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ 6455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st1], 1(%[dst]) \n\t" /* load odd 1 from dst */ 6475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 6495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ 6505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 6515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 8 */ 6525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 6535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 6545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 14(%[dst]) \n\t" /* store even 8 to dst */ 6555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 13(%[src]) \n\t" 6565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* odd 2 */ 6575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 3(%[dst]) \n\t" /* load odd 2 from dst */ 6585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ 6595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ 6605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 6625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ 6635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 6645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st3], %[st3], %[st1] \n\t" /* average odd 1 */ 6655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 6665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* odd 3 */ 6675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 1(%[dst]) \n\t" /* store odd 1 to dst */ 6685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ 6695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ 6705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 6725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ 6735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 6745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st1] \n\t" /* average odd 2 */ 6755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 6765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 3(%[dst]) \n\t" /* store odd 2 to dst */ 6775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 5(%[dst]) \n\t" /* load odd 3 from dst */ 6785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter45] \n\t" /* odd 4 */ 6795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ 6805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ 6815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[st1], 7(%[dst]) \n\t" /* load odd 4 from dst */ 6835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 5. pixel */ 6855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ 6865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 6875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st2] \n\t" /* average odd 3 */ 6885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 5(%[dst]) \n\t" /* store odd 3 to dst */ 6895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* odd 5 */ 6905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ 6915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ 6925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 9(%[dst]) \n\t" /* load odd 5 from dst */ 6945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 6. pixel */ 6965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ 6975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 6985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[st1], %[st1], %[st3] \n\t" /* average odd 4 */ 6995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 7(%[dst]) \n\t" /* store odd 4 to dst */ 7005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter45] \n\t" /* odd 6 */ 7015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ 7025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ 7035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 7. pixel */ 7055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ 7065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 7075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 5 */ 7085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 9(%[dst]) \n\t" /* store odd 5 to dst */ 7095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload2], 11(%[dst]) \n\t" /* load odd 6 from dst */ 7105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" /* odd 7 */ 7115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ 7125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload3], 13(%[dst]) \n\t" /* load odd 7 from dst */ 7145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 8. pixel */ 7165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" /* odd 8 */ 7175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ 7185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[qload1], 15(%[dst]) \n\t" /* load odd 8 from dst */ 7205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ 7225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average odd 6 */ 7235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ 7255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average odd 7 */ 7265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ 7285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 8 */ 7295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload2], 11(%[dst]) \n\t" /* store odd 6 to dst */ 7315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload3], 13(%[dst]) \n\t" /* store odd 7 to dst */ 7325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[qload1], 15(%[dst]) \n\t" /* store odd 8 to dst */ 7335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7347bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [st1] "=&r"(st1), 7357bc9febe8749e98a3812a0dc4380ceae75c29450Johann [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), 7367bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p3] "=&r"(p3), [p4] "=&r"(p4), [qload3] "=&r"(qload3), 7377bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), 7387bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp3] "=&r"(Temp3) 7397bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm), 7407bc9febe8749e98a3812a0dc4380ceae75c29450Johann [dst] "r"(dst), [src] "r"(src)); 7415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += 16; 7435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 16; 7445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 7455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 7475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr += src_stride; 7485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr += dst_stride; 7495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 7505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 7515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 752da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, 7535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 754df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const InterpKernel *filter, int x0_q4, 755df37111358d02836cb29bbcb9c6e4c95dff90a16Johann int32_t x_step_q4, int y0_q4, int y_step_q4, 7565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int w, int h) { 757df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const int16_t *const filter_x = filter[x0_q4]; 758da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t pos = 38; 759da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 760da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(x_step_q4 == 16); 761da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 762da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* bit positon for extract from acc */ 7637bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__("wrdsp %[pos], 1 \n\t" 7647bc9febe8749e98a3812a0dc4380ceae75c29450Johann : 7657bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [pos] "r"(pos)); 766da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 767da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* prefetch data to cache memory */ 768da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src); 769da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + 32); 770da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst); 771da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 772da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian switch (w) { 773da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 4: 7747bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_bi_avg_horiz_4_dspr2(src, src_stride, dst, dst_stride, filter_x, 7757bc9febe8749e98a3812a0dc4380ceae75c29450Johann h); 776da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 777da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 8: 7787bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_bi_avg_horiz_8_dspr2(src, src_stride, dst, dst_stride, filter_x, 7797bc9febe8749e98a3812a0dc4380ceae75c29450Johann h); 780da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 781da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 16: 7827bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_bi_avg_horiz_16_dspr2(src, src_stride, dst, dst_stride, filter_x, 7837bc9febe8749e98a3812a0dc4380ceae75c29450Johann h, 1); 784da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 785da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 32: 7867bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_bi_avg_horiz_16_dspr2(src, src_stride, dst, dst_stride, filter_x, 7877bc9febe8749e98a3812a0dc4380ceae75c29450Johann h, 2); 788da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 789da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 64: 790da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + 64); 791da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + 32); 792da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 7937bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_bi_avg_horiz_64_dspr2(src, src_stride, dst, dst_stride, filter_x, 7947bc9febe8749e98a3812a0dc4380ceae75c29450Johann h); 795da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 796da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian default: 797df37111358d02836cb29bbcb9c6e4c95dff90a16Johann vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, 798df37111358d02836cb29bbcb9c6e4c95dff90a16Johann x_step_q4, y0_q4, y_step_q4, w, h); 799da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 8005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 8015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 8025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#endif 803