15ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang/* 25ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 35ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * 45ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Use of this source code is governed by a BSD-style license 55ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * that can be found in the LICENSE file in the root of the source 65ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * tree. An additional intellectual property rights grant can be found 75ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * in the file PATENTS. All contributing project authors may 85ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * be found in the AUTHORS file in the root of the source tree. 95ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <assert.h> 125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <stdio.h> 135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_dsp_rtcd.h" 15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/mips/convolve_common_dspr2.h" 16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/vpx_dsp_common.h" 17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/vpx_filter.h" 185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx_ports/mem.h" 195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#if HAVE_DSPR2 217bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void convolve_horiz_4_dspr2(const uint8_t *src, int32_t src_stride, 227bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *dst, int32_t dst_stride, 237bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_x0, int32_t h) { 245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y; 25da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *cm = vpx_ff_cropTbl; 265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t vector1b, vector2b, vector3b, vector4b; 275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3, Temp4; 285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2; 305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4; 315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t n1, n2, n3, n4; 325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tn1, tn2; 335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_x0)[0]; 355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_x0)[1]; 365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_x0)[2]; 375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_x0)[3]; 385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 41da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride); 42da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 32); 43da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 457bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp1] \n\t" 535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp1] \n\t" 545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tp2] \n\t" 555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tp2] \n\t" 565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tn2], 8(%[src]) \n\t" 605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac3, 31 \n\t" 625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tn2] \n\t" 675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn1], %[tn2], 3 \n\t" 685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn2], %[tp2], 3 \n\t" 695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tp2], %[tp1], 3 \n\t" 705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" 735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" 745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp1], %[Temp1](%[cm]) \n\t" 785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[n1], %[tp2] \n\t" 815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n2], %[tp2] \n\t" 825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[n3], %[tn2] \n\t" 835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n4], %[tn2] \n\t" 845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" 875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n4], %[vector4b] \n\t" 885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp2], %[Temp3](%[cm]) \n\t" 925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[n1], %[tn1] \n\t" 955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" 965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" 975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" 985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n1], %[vector4b] \n\t" 995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp4], $ac2, 31 \n\t" 1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* clamp */ 1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tn1], %[Temp2](%[cm]) \n\t" 1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[n2], %[Temp4](%[cm]) \n\t" 1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* store bytes */ 1065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp1], 0(%[dst]) \n\t" 1075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tn1], 1(%[dst]) \n\t" 1085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp2], 2(%[dst]) \n\t" 1095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[n2], 3(%[dst]) \n\t" 1105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1117bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), 1127bc9febe8749e98a3812a0dc4380ceae75c29450Johann [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), 1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p4] "=&r"(p4), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), 1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann [n4] "=&r"(n4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), 1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) 1167bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), 1177bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), 1187bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), 1197bc9febe8749e98a3812a0dc4380ceae75c29450Johann [src] "r"(src)); 1205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 1265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1277bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void convolve_horiz_8_dspr2(const uint8_t *src, int32_t src_stride, 1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *dst, int32_t dst_stride, 1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_x0, int32_t h) { 1305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y; 131da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *cm = vpx_ff_cropTbl; 1325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t vector1b, vector2b, vector3b, vector4b; 1345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2; 1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, n1; 1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tn1, tn2, tn3; 1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st0, st1; 1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_x0)[0]; 1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_x0)[1]; 1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_x0)[2]; 1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_x0)[3]; 1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 147da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride); 148da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 32); 149da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 1545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 1565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 1575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 1585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 1595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 1605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp1] \n\t" 1615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp1] \n\t" 1625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tp2] \n\t" 1635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tp2] \n\t" 1645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tn2], 8(%[src]) \n\t" 1655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 1665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 1675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 1685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 1695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac3, 31 \n\t" 1705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 1725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tn2] \n\t" 1735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n1], %[tn2] \n\t" 1745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tn1], 12(%[src]) \n\t" 1755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 1765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 1775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" 1785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" 1795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 1805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 1825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st0], %[Temp1](%[cm]) \n\t" 1835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 1845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 1855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[tn1] \n\t" 1865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[vector1b] \n\t" 1875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[vector2b] \n\t" 1885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[vector3b] \n\t" 1895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector4b] \n\t" 1905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" 1915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 1935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 1955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 1965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 1975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st0], 0(%[dst]) \n\t" 1985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp3](%[cm]) \n\t" 1995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn3], %[tn1], 3 \n\t" 2015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn1], %[tn2], 3 \n\t" 2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn2], %[tp2], 3 \n\t" 2035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tp2], %[tp1], 3 \n\t" 2045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" 2065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" 2075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" 2085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 2095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 2105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st0], %[Temp1](%[cm]) \n\t" 2125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 2145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 2155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 2165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 2(%[dst]) \n\t" 2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp2] \n\t" 2185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp2] \n\t" 2195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tn2] \n\t" 2205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tn2] \n\t" 2215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st0], 4(%[dst]) \n\t" 2225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 2255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 2265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 2275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 2295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 2305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 2315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 2325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 2335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tn1] \n\t" 2345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n1], %[tn1] \n\t" 2355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st0], %[Temp3](%[cm]) \n\t" 2365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[vector1b] \n\t" 2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[vector2b] \n\t" 2385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[vector3b] \n\t" 2395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[vector4b] \n\t" 2405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac1, 31 \n\t" 2415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 2435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp2](%[cm]) \n\t" 2445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[tn3] \n\t" 2455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" 2465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" 2475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" 2485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector4b] \n\t" 2495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 2505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 2525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 1(%[dst]) \n\t" 2535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st0], 6(%[dst]) \n\t" 2545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" 2555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" 2565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" 2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 2585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac2, 31 \n\t" 2595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* clamp */ 2615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p4], %[Temp3](%[cm]) \n\t" 2625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p2], %[Temp2](%[cm]) \n\t" 2635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[n1], %[Temp1](%[cm]) \n\t" 2645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* store bytes */ 2665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[p4], 3(%[dst]) \n\t" 2675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[p2], 5(%[dst]) \n\t" 2685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[n1], 7(%[dst]) \n\t" 2695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2707bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), 2717bc9febe8749e98a3812a0dc4380ceae75c29450Johann [tn2] "=&r"(tn2), [tn3] "=&r"(tn3), [st0] "=&r"(st0), 2727bc9febe8749e98a3812a0dc4380ceae75c29450Johann [st1] "=&r"(st1), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), 2737bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p4] "=&r"(p4), [n1] "=&r"(n1), [Temp1] "=&r"(Temp1), 2747bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) 2757bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), 2767bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), 2777bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), 2787bc9febe8749e98a3812a0dc4380ceae75c29450Johann [src] "r"(src)); 2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 2815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 2825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2867bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void convolve_horiz_16_dspr2(const uint8_t *src_ptr, int32_t src_stride, 2877bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *dst_ptr, int32_t dst_stride, 2887bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_x0, int32_t h, 2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t count) { 2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y, c; 2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src; 2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst; 293da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *cm = vpx_ff_cropTbl; 2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector_64 = 64; 2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t filter12, filter34, filter56, filter78; 2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 2975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t qload1, qload2, qload3; 2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, p5; 2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st1, st2, st3; 3005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter12 = ((const int32_t *)filter_x0)[0]; 3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter34 = ((const int32_t *)filter_x0)[1]; 3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter56 = ((const int32_t *)filter_x0)[2]; 3045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter78 = ((const int32_t *)filter_x0)[3]; 3055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 3075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src = src_ptr; 3085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = dst_ptr; 3095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 311da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride); 312da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride + 32); 313da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst_ptr + dst_stride); 3145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (c = 0; c < count; c++) { 3167bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 3175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 0(%[src]) \n\t" 3185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 4(%[src]) \n\t" 3195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 3215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ 3225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 3235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ 3245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 3255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 3265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 3275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 3285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 8(%[src]) \n\t" 3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ 3315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ 3325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ 3335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ 3345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ 3355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 3375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ 3385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 3395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 3405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 3415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 12(%[src]) \n\t" 3425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ 3435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ 3445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ 3455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ 3465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ 3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ 3485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 3505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ 3515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 3525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 3535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[dst]) \n\t" /* even 1 */ 3545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ 3555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ 3565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ 3575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ 3585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ 3595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ 3605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 3625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ 3635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 3645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 3655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 2(%[dst]) \n\t" /* even 1 */ 3665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 16(%[src]) \n\t" 3675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ 3685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ 3695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ 3705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ 3715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ 3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ 3735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 5. pixel */ 3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ 3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 3785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 4(%[dst]) \n\t" /* even 3 */ 3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ 3805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ 3815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ 3825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ 3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ 3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ 3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 6. pixel */ 3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ 3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 3895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 3905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 6(%[dst]) \n\t" /* even 4 */ 3915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 20(%[src]) \n\t" 3925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ 3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ 3945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ 3955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ 3965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ 3975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ 3985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 7. pixel */ 4005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ 4015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 4025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload3] \n\t" 4035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 8(%[dst]) \n\t" /* even 5 */ 4045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ 4055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ 4065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ 4075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ 4085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ 4095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ 4105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 8. pixel */ 4125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ 4135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 4145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ 4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ 4165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 10(%[dst]) \n\t" /* even 6 */ 4175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ 4185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ 4195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ 4205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ 4215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* ODD pixels */ 4235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 1(%[src]) \n\t" 4245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 5(%[src]) \n\t" 4255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 4275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ 4285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 4305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 4315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 4325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 4335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 12(%[dst]) \n\t" /* even 7 */ 4345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 9(%[src]) \n\t" 4355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ 4365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ 4375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ 4385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ 4395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ 4405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ 4415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 4435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ 4445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 4455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 4465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 4475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 14(%[dst]) \n\t" /* even 8 */ 4485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 13(%[src]) \n\t" 4495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ 4505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ 4515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ 4525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ 4535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ 4545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ 4555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 4575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ 4585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 4595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 4605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */ 4615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ 4625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ 4635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ 4645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ 4655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ 4665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ 4675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 4695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ 4705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 4725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */ 4735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 17(%[src]) \n\t" 4745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ 4755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ 4765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ 4775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ 4785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ 4795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ 4805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 5. pixel */ 4825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ 4835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 4845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 4855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */ 4865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ 4875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ 4885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ 4895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ 4905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ 4915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ 4925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 6. pixel */ 4945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ 4955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 4965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 4975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */ 4985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 21(%[src]) \n\t" 4995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ 5005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ 5015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ 5025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ 5035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ 5045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ 5055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 7. pixel */ 5075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ 5085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 5095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload3] \n\t" 5105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */ 5115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ 5125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ 5135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ 5145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ 5155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ 5165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 8. pixel */ 5185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ 5195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ 5205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ 5215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ 5225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ 5235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ 5255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ 5265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ 5275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */ 5295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */ 5305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */ 5315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5327bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), 5337bc9febe8749e98a3812a0dc4380ceae75c29450Johann [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2), 5347bc9febe8749e98a3812a0dc4380ceae75c29450Johann [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), 5357bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), 5367bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) 5377bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [filter12] "r"(filter12), [filter34] "r"(filter34), 5387bc9febe8749e98a3812a0dc4380ceae75c29450Johann [filter56] "r"(filter56), [filter78] "r"(filter78), 5397bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), 5407bc9febe8749e98a3812a0dc4380ceae75c29450Johann [src] "r"(src)); 5415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += 16; 5435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 16; 5445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 5455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 5475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr += src_stride; 5485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr += dst_stride; 5495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 5505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 5515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5527bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void convolve_horiz_64_dspr2(const uint8_t *src_ptr, int32_t src_stride, 5537bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *dst_ptr, int32_t dst_stride, 5547bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_x0, int32_t h) { 5555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y, c; 5565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src; 5575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst; 558da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint8_t *cm = vpx_ff_cropTbl; 5595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector_64 = 64; 5605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t filter12, filter34, filter56, filter78; 5615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 5625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t qload1, qload2, qload3; 5635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, p5; 5645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st1, st2, st3; 5655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter12 = ((const int32_t *)filter_x0)[0]; 5675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter34 = ((const int32_t *)filter_x0)[1]; 5685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter56 = ((const int32_t *)filter_x0)[2]; 5695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter78 = ((const int32_t *)filter_x0)[3]; 5705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 5725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src = src_ptr; 5735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = dst_ptr; 5745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 576da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride); 577da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride + 32); 578da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src_ptr + src_stride + 64); 579da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst_ptr + dst_stride); 580da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst_ptr + dst_stride + 32); 5815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (c = 0; c < 4; c++) { 5837bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 5845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 0(%[src]) \n\t" 5855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 4(%[src]) \n\t" 5865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 5885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ 5895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 5905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ 5915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 5925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 5935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 5945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 5955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 5965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 8(%[src]) \n\t" 5975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ 5985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ 5995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ 6005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ 6015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ 6025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 6045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ 6055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 6065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 6075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 6085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 12(%[src]) \n\t" 6095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ 6105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ 6115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ 6125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ 6135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ 6145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ 6155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 6175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ 6185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 6195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 6205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[dst]) \n\t" /* even 1 */ 6215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ 6225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ 6235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ 6245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ 6255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ 6265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ 6275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 6295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ 6305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 6315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 6325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 2(%[dst]) \n\t" /* even 1 */ 6335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 16(%[src]) \n\t" 6345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ 6355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ 6365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ 6375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ 6385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ 6395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ 6405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 5. pixel */ 6425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ 6435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 6445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 6455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 4(%[dst]) \n\t" /* even 3 */ 6465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ 6475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ 6485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ 6495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ 6505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ 6515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ 6525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 6. pixel */ 6545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ 6555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 6565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 6575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 6(%[dst]) \n\t" /* even 4 */ 6585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 20(%[src]) \n\t" 6595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ 6605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ 6615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ 6625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ 6635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ 6645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ 6655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 7. pixel */ 6675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ 6685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 6695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload3] \n\t" 6705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 8(%[dst]) \n\t" /* even 5 */ 6715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ 6725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ 6735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ 6745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ 6755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ 6765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ 6775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 8. pixel */ 6795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ 6805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 6815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ 6825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ 6835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 10(%[dst]) \n\t" /* even 6 */ 6845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ 6855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ 6865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ 6875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ 6885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* ODD pixels */ 6905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 1(%[src]) \n\t" 6915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 5(%[src]) \n\t" 6925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 6945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ 6955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 6965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 6975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 6985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 6995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 7005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 12(%[dst]) \n\t" /* even 7 */ 7015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 9(%[src]) \n\t" 7025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ 7035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ 7045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ 7055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ 7065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ 7075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ 7085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 7105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ 7115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 7125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload3] \n\t" 7135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload3] \n\t" 7145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 14(%[dst]) \n\t" /* even 8 */ 7155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 13(%[src]) \n\t" 7165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ 7175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ 7185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ 7195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ 7205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ 7215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ 7225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 7245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ 7255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 7265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 7275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */ 7285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ 7295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ 7305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ 7315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ 7325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ 7335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ 7345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 7365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ 7375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 7385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 7395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */ 7405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 17(%[src]) \n\t" 7415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ 7425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ 7435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ 7445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ 7455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ 7465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ 7475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 5. pixel */ 7495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ 7505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 7515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 7525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */ 7535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ 7545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ 7555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ 7565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ 7575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ 7585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ 7595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 6. pixel */ 7615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ 7625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 7635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 7645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */ 7655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload3], 21(%[src]) \n\t" 7665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ 7675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ 7685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ 7695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ 7705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ 7715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ 7725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 7. pixel */ 7745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ 7755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 7765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload3] \n\t" 7775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */ 7785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ 7795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ 7805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ 7815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ 7825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ 7835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 8. pixel */ 7855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ 7865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ 7875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ 7885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ 7895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ 7905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ 7925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ 7935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ 7945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */ 7965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */ 7975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */ 7985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7997bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), 8007bc9febe8749e98a3812a0dc4380ceae75c29450Johann [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2), 8017bc9febe8749e98a3812a0dc4380ceae75c29450Johann [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), 8027bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), 8037bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) 8047bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [filter12] "r"(filter12), [filter34] "r"(filter34), 8057bc9febe8749e98a3812a0dc4380ceae75c29450Johann [filter56] "r"(filter56), [filter78] "r"(filter78), 8067bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), 8077bc9febe8749e98a3812a0dc4380ceae75c29450Johann [src] "r"(src)); 8085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += 16; 8105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 16; 8115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 8125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 8145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr += src_stride; 8155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr += dst_stride; 8165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 8175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 8185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 819da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, 8205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 821df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const InterpKernel *filter, int x0_q4, 822df37111358d02836cb29bbcb9c6e4c95dff90a16Johann int x_step_q4, int y0_q4, int y_step_q4, int w, 8237bc9febe8749e98a3812a0dc4380ceae75c29450Johann int h) { 824df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const int16_t *const filter_x = filter[x0_q4]; 825da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(x_step_q4 == 16); 826da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(((const int32_t *)filter_x)[1] != 0x800000); 827da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 828da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (((const int32_t *)filter_x)[0] == 0) { 829df37111358d02836cb29bbcb9c6e4c95dff90a16Johann vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, 830df37111358d02836cb29bbcb9c6e4c95dff90a16Johann x_step_q4, y0_q4, y_step_q4, w, h); 8315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 832da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t pos = 38; 8335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 834da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load((const uint8_t *)filter_x); 835da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian src -= 3; 8365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 837da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* bit positon for extract from acc */ 8387bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__("wrdsp %[pos], 1 \n\t" 8397bc9febe8749e98a3812a0dc4380ceae75c29450Johann : 8407bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [pos] "r"(pos)); 8415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 842da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* prefetch data to cache memory */ 843da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src); 844da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + 32); 845da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst); 846da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 847da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian switch (w) { 848da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 4: 8497bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_horiz_4_dspr2(src, (int32_t)src_stride, dst, 8507bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filter_x, (int32_t)h); 851da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 852da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 8: 8537bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_horiz_8_dspr2(src, (int32_t)src_stride, dst, 8547bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filter_x, (int32_t)h); 855da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 856da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 16: 8577bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_horiz_16_dspr2(src, (int32_t)src_stride, dst, 8587bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filter_x, (int32_t)h, 1); 859da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 860da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 32: 8617bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_horiz_16_dspr2(src, (int32_t)src_stride, dst, 8627bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filter_x, (int32_t)h, 2); 863da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 864da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 64: 865da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + 64); 866da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + 32); 867da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 8687bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_horiz_64_dspr2(src, (int32_t)src_stride, dst, 8697bc9febe8749e98a3812a0dc4380ceae75c29450Johann (int32_t)dst_stride, filter_x, (int32_t)h); 870da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 871da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian default: 872df37111358d02836cb29bbcb9c6e4c95dff90a16Johann vpx_convolve8_horiz_c(src + 3, src_stride, dst, dst_stride, filter, 873df37111358d02836cb29bbcb9c6e4c95dff90a16Johann x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); 874da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 8755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 8765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 8775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 8785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#endif 879