15ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang/* 25ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 35ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * 45ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Use of this source code is governed by a BSD-style license 55ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * that can be found in the LICENSE file in the root of the source 65ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * tree. An additional intellectual property rights grant can be found 75ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * in the file PATENTS. All contributing project authors may 85ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * be found in the AUTHORS file in the root of the source tree. 95ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <assert.h> 125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <stdio.h> 135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 14da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "./vpx_dsp_rtcd.h" 15da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/mips/convolve_common_dspr2.h" 16da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/vpx_convolve.h" 17da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian#include "vpx_dsp/vpx_dsp_common.h" 185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx_ports/mem.h" 195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#if HAVE_DSPR2 217bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void convolve_avg_vert_4_dspr2(const uint8_t *src, int32_t src_stride, 227bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *dst, int32_t dst_stride, 237bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_y, int32_t w, 245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h) { 257bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t x, y; 265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src_ptr; 277bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *dst_ptr; 287bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *cm = vpx_ff_cropTbl; 297bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t vector4a = 64; 307bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t load1, load2, load3, load4; 317bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t p1, p2; 327bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t n1, n2; 337bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t scratch1, scratch2; 347bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t store1, store2; 357bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t vector1b, vector2b, vector3b, vector4b; 367bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t Temp1, Temp2; 375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_y)[0]; 395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_y)[1]; 405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_y)[2]; 415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_y)[3]; 425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src -= 3 * src_stride; 445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 47da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (x = 0; x < w; x += 4) { 505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr = src + x; 515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr = dst + x; 525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 537bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load1], 0(%[src_ptr]) \n\t" 555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load2], 0(%[src_ptr]) \n\t" 575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load3], 0(%[src_ptr]) \n\t" 595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load4], 0(%[src_ptr]) \n\t" 615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac0 \n\t" 635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac0 \n\t" 675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch1], %[load1] \n\t" 725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[load2] \n\t" 735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch2], %[load3] \n\t" 765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[load4] \n\t" 775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" 815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" 825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" 835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" 845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch1], %[load1] \n\t" 865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[load2] \n\t" 875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch2], %[load3] \n\t" 905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[load4] \n\t" 915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" 955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" 965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load1], 0(%[src_ptr]) \n\t" 1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load2], 0(%[src_ptr]) \n\t" 1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load3], 0(%[src_ptr]) \n\t" 1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 1065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load4], 0(%[src_ptr]) \n\t" 1075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch1], %[load1] \n\t" 1095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[load2] \n\t" 1105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 1115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 1125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch2], %[load3] \n\t" 1135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[load4] \n\t" 1145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 1155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 1165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" 1185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" 1195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac0, 31 \n\t" 1205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" 1215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" 1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac1, 31 \n\t" 1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch1], %[load1] \n\t" 1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[load2] \n\t" 1265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 1275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 1285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch1], 0(%[dst_ptr]) \n\t" 1295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch2], %[load3] \n\t" 1305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[load4] \n\t" 1315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 1325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch2], 1(%[dst_ptr]) \n\t" 1345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store1], %[Temp1](%[cm]) \n\t" 1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" 1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ 1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac2, 31 \n\t" 1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store2], %[Temp2](%[cm]) \n\t" 1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" 1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" 1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ 1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch1], 2(%[dst_ptr]) \n\t" 1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store1], 0(%[dst_ptr]) \n\t" 1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store2], 1(%[dst_ptr]) \n\t" 1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch2], 3(%[dst_ptr]) \n\t" 1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store1], %[Temp1](%[cm]) \n\t" 1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store2], %[Temp2](%[cm]) \n\t" 1545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ 1555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ 1565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store1], 2(%[dst_ptr]) \n\t" 1585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store2], 3(%[dst_ptr]) \n\t" 1595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1607bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), 1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2), 1627bc9febe8749e98a3812a0dc4380ceae75c29450Johann [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1), 1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1), 1647bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), 1657bc9febe8749e98a3812a0dc4380ceae75c29450Johann [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) 1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), 1677bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), 1687bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), 1697bc9febe8749e98a3812a0dc4380ceae75c29450Johann [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); 1705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 1735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 1745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 1755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 1775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1787bc9febe8749e98a3812a0dc4380ceae75c29450Johannstatic void convolve_avg_vert_64_dspr2(const uint8_t *src, int32_t src_stride, 1797bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *dst, int32_t dst_stride, 1807bc9febe8749e98a3812a0dc4380ceae75c29450Johann const int16_t *filter_y, int32_t h) { 1817bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t x, y; 1825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src_ptr; 1837bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *dst_ptr; 1847bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *cm = vpx_ff_cropTbl; 1857bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t vector4a = 64; 1867bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t load1, load2, load3, load4; 1877bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t p1, p2; 1887bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t n1, n2; 1897bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t scratch1, scratch2; 1907bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t store1, store2; 1917bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t vector1b, vector2b, vector3b, vector4b; 1927bc9febe8749e98a3812a0dc4380ceae75c29450Johann int32_t Temp1, Temp2; 1935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_y)[0]; 1955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_y)[1]; 1965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_y)[2]; 1975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_y)[3]; 1985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src -= 3 * src_stride; 2005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 203da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 204da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride + 32); 2055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (x = 0; x < 64; x += 4) { 2075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr = src + x; 2085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr = dst + x; 2095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2107bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load1], 0(%[src_ptr]) \n\t" 2125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load2], 0(%[src_ptr]) \n\t" 2145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load3], 0(%[src_ptr]) \n\t" 2165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load4], 0(%[src_ptr]) \n\t" 2185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac0 \n\t" 2205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 2215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 2225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac0 \n\t" 2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 2255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 2265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 2275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch1], %[load1] \n\t" 2295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[load2] \n\t" 2305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 2315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 2325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch2], %[load3] \n\t" 2335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[load4] \n\t" 2345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 2355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 2365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" 2385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" 2395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" 2405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" 2415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch1], %[load1] \n\t" 2435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[load2] \n\t" 2445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 2455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 2465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch2], %[load3] \n\t" 2475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[load4] \n\t" 2485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 2495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 2505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" 2525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" 2535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 2545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 2555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load1], 0(%[src_ptr]) \n\t" 2585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load2], 0(%[src_ptr]) \n\t" 2605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load3], 0(%[src_ptr]) \n\t" 2625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load4], 0(%[src_ptr]) \n\t" 2645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch1], %[load1] \n\t" 2665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[load2] \n\t" 2675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 2685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 2695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch2], %[load3] \n\t" 2705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[load4] \n\t" 2715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 2725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 2735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" 2755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" 2765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac0, 31 \n\t" 2775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" 2785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" 2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac1, 31 \n\t" 2805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch1], %[load1] \n\t" 2825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[load2] \n\t" 2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch1], 0(%[dst_ptr]) \n\t" 2865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch2], %[load3] \n\t" 2875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[load4] \n\t" 2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch2], 1(%[dst_ptr]) \n\t" 2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store1], %[Temp1](%[cm]) \n\t" 2935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" 2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ 2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac2, 31 \n\t" 2975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store2], %[Temp2](%[cm]) \n\t" 2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" 3005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" 3015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ 3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch1], 2(%[dst_ptr]) \n\t" 3045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store1], 0(%[dst_ptr]) \n\t" 3065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store2], 1(%[dst_ptr]) \n\t" 3075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch2], 3(%[dst_ptr]) \n\t" 3085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store1], %[Temp1](%[cm]) \n\t" 3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store2], %[Temp2](%[cm]) \n\t" 3115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ 3125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ 3135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store1], 2(%[dst_ptr]) \n\t" 3155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store2], 3(%[dst_ptr]) \n\t" 3165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3177bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), 3187bc9febe8749e98a3812a0dc4380ceae75c29450Johann [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2), 3197bc9febe8749e98a3812a0dc4380ceae75c29450Johann [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1), 3207bc9febe8749e98a3812a0dc4380ceae75c29450Johann [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1), 3217bc9febe8749e98a3812a0dc4380ceae75c29450Johann [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), 3227bc9febe8749e98a3812a0dc4380ceae75c29450Johann [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) 3237bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), 3247bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), 3257bc9febe8749e98a3812a0dc4380ceae75c29450Johann [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), 3267bc9febe8749e98a3812a0dc4380ceae75c29450Johann [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); 3275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 3315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 3325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 3345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 335da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, 3365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 337df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const InterpKernel *filter, int x0_q4, 338df37111358d02836cb29bbcb9c6e4c95dff90a16Johann int32_t x_step_q4, int y0_q4, int y_step_q4, 339df37111358d02836cb29bbcb9c6e4c95dff90a16Johann int w, int h) { 340df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const int16_t *const filter_y = filter[y0_q4]; 341da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(y_step_q4 == 16); 342da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(((const int32_t *)filter_y)[1] != 0x800000); 343da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 344da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian if (((const int32_t *)filter_y)[0] == 0) { 345df37111358d02836cb29bbcb9c6e4c95dff90a16Johann vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter, 346df37111358d02836cb29bbcb9c6e4c95dff90a16Johann x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); 3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 348da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian uint32_t pos = 38; 349da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 350da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian /* bit positon for extract from acc */ 3517bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__("wrdsp %[pos], 1 \n\t" 3527bc9febe8749e98a3812a0dc4380ceae75c29450Johann : 3537bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [pos] "r"(pos)); 354da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 355da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst); 356da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian 357da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian switch (w) { 358da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 4: 359da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 8: 360da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 16: 361da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 32: 3627bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_avg_vert_4_dspr2(src, src_stride, dst, dst_stride, filter_y, w, 3637bc9febe8749e98a3812a0dc4380ceae75c29450Johann h); 364da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 365da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian case 64: 366da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + 32); 3677bc9febe8749e98a3812a0dc4380ceae75c29450Johann convolve_avg_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, 3687bc9febe8749e98a3812a0dc4380ceae75c29450Johann h); 369da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 370da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian default: 371df37111358d02836cb29bbcb9c6e4c95dff90a16Johann vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, 372df37111358d02836cb29bbcb9c6e4c95dff90a16Johann x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); 373da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian break; 3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 378da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, 3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 380df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const InterpKernel *filter, int x0_q4, 381df37111358d02836cb29bbcb9c6e4c95dff90a16Johann int32_t x_step_q4, int y0_q4, int y_step_q4, int w, 3827bc9febe8749e98a3812a0dc4380ceae75c29450Johann int h) { 3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Fixed size intermediate buffer places limits on parameters. */ 384da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian DECLARE_ALIGNED(32, uint8_t, temp[64 * 135]); 3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7; 3865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang assert(w <= 64); 3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang assert(h <= 64); 389da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(x_step_q4 == 16); 390da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian assert(y_step_q4 == 16); 3915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3927bc9febe8749e98a3812a0dc4380ceae75c29450Johann if (intermediate_height < h) intermediate_height = h; 3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 394df37111358d02836cb29bbcb9c6e4c95dff90a16Johann vpx_convolve8_horiz(src - (src_stride * 3), src_stride, temp, 64, filter, 395df37111358d02836cb29bbcb9c6e4c95dff90a16Johann x0_q4, x_step_q4, y0_q4, y_step_q4, w, 396df37111358d02836cb29bbcb9c6e4c95dff90a16Johann intermediate_height); 3975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 398df37111358d02836cb29bbcb9c6e4c95dff90a16Johann vpx_convolve8_avg_vert(temp + 64 * 3, 64, dst, dst_stride, filter, x0_q4, 399df37111358d02836cb29bbcb9c6e4c95dff90a16Johann x_step_q4, y0_q4, y_step_q4, w, h); 4005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 4015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 402da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanianvoid vpx_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, 4035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 404df37111358d02836cb29bbcb9c6e4c95dff90a16Johann const InterpKernel *filter, int x0_q4, 405df37111358d02836cb29bbcb9c6e4c95dff90a16Johann int32_t x_step_q4, int y0_q4, int y_step_q4, int w, 4067bc9febe8749e98a3812a0dc4380ceae75c29450Johann int h) { 4075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int x, y; 4087bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t tp1, tp2, tn1, tp3, tp4, tn2; 409df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)filter; 410df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)x0_q4; 411df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)x_step_q4; 412df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)y0_q4; 413df37111358d02836cb29bbcb9c6e4c95dff90a16Johann (void)y_step_q4; 4145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 416da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src); 417da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + 32); 418da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst); 4195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang switch (w) { 4215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 4: 4225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 1 word storage */ 4237bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (y = h; y--;) { 424da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride); 425da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 32); 426da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 4275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4287bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 4295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 4305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 4317bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 4327bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 0(%[dst]) \n\t" /* store */ 4335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4347bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [tn1] "=&r"(tn1), [tp1] "=&r"(tp1), [tp2] "=&r"(tp2) 4357bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [src] "r"(src), [dst] "r"(dst)); 4365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 4385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 4395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 4415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 8: 4425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 2 word storage */ 4437bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (y = h; y--;) { 444da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride); 445da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 32); 446da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 4475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4487bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 4495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 4505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 4515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 4(%[src]) \n\t" 4525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 4(%[dst]) \n\t" 4537bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 4547bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 0(%[dst]) \n\t" /* store */ 4557bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 4567bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 4(%[dst]) \n\t" /* store */ 4575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4587bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), 4597bc9febe8749e98a3812a0dc4380ceae75c29450Johann [tp4] "=&r"(tp4), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2) 4607bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [src] "r"(src), [dst] "r"(dst)); 4615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 4635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 4645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 4665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 16: 4675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 4 word storage */ 4687bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (y = h; y--;) { 469da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride); 470da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 32); 471da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 4725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4737bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 4745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 4755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 4765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 4(%[src]) \n\t" 4775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 4(%[dst]) \n\t" 4787bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 4795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 8(%[src]) \n\t" 4805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 8(%[dst]) \n\t" 4817bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 0(%[dst]) \n\t" /* store */ 4827bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 4837bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 4(%[dst]) \n\t" /* store */ 4845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 12(%[src]) \n\t" 4855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 12(%[dst]) \n\t" 4867bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 4877bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 8(%[dst]) \n\t" /* store */ 4887bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 4897bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 12(%[dst]) \n\t" /* store */ 4905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4917bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), 4927bc9febe8749e98a3812a0dc4380ceae75c29450Johann [tp4] "=&r"(tp4), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2) 4937bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [src] "r"(src), [dst] "r"(dst)); 4945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 4965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 4975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 4995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 32: 5005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 8 word storage */ 5017bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (y = h; y--;) { 502da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride); 503da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 32); 504da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 5055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5067bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 5075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 5085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 5095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 4(%[src]) \n\t" 5105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 4(%[dst]) \n\t" 5117bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 8(%[src]) \n\t" 5135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 8(%[dst]) \n\t" 5147bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 0(%[dst]) \n\t" /* store */ 5157bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5167bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 4(%[dst]) \n\t" /* store */ 5175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 12(%[src]) \n\t" 5185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 12(%[dst]) \n\t" 5197bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 16(%[src]) \n\t" 5215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 16(%[dst]) \n\t" 5227bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 8(%[dst]) \n\t" /* store */ 5237bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5247bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 12(%[dst]) \n\t" /* store */ 5255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 20(%[src]) \n\t" 5265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 20(%[dst]) \n\t" 5277bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 24(%[src]) \n\t" 5295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 24(%[dst]) \n\t" 5307bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 16(%[dst]) \n\t" /* store */ 5317bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5327bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 20(%[dst]) \n\t" /* store */ 5335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 28(%[src]) \n\t" 5345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 28(%[dst]) \n\t" 5357bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5367bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 24(%[dst]) \n\t" /* store */ 5377bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5387bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 28(%[dst]) \n\t" /* store */ 5395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5407bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), 5417bc9febe8749e98a3812a0dc4380ceae75c29450Johann [tp4] "=&r"(tp4), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2) 5427bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [src] "r"(src), [dst] "r"(dst)); 5435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 5455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 5465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 5475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 5485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 64: 549da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + 64); 550da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + 32); 5515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 16 word storage */ 5537bc9febe8749e98a3812a0dc4380ceae75c29450Johann for (y = h; y--;) { 554da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride); 555da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 32); 556da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_load(src + src_stride + 64); 557da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride); 558da49e34c1fb5e99681f4ad99c21d9cfd83eddb96Vignesh Venkatasubramanian prefetch_store(dst + dst_stride + 32); 5595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5607bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 5615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 5625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 5635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 4(%[src]) \n\t" 5645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 4(%[dst]) \n\t" 5657bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 8(%[src]) \n\t" 5675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 8(%[dst]) \n\t" 5687bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 0(%[dst]) \n\t" /* store */ 5697bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5707bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 4(%[dst]) \n\t" /* store */ 5715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 12(%[src]) \n\t" 5725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 12(%[dst]) \n\t" 5737bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 16(%[src]) \n\t" 5755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 16(%[dst]) \n\t" 5767bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 8(%[dst]) \n\t" /* store */ 5777bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5787bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 12(%[dst]) \n\t" /* store */ 5795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 20(%[src]) \n\t" 5805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 20(%[dst]) \n\t" 5817bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 24(%[src]) \n\t" 5835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 24(%[dst]) \n\t" 5847bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 16(%[dst]) \n\t" /* store */ 5857bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5867bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 20(%[dst]) \n\t" /* store */ 5875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 28(%[src]) \n\t" 5885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 28(%[dst]) \n\t" 5897bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 32(%[src]) \n\t" 5915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 32(%[dst]) \n\t" 5927bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 24(%[dst]) \n\t" /* store */ 5937bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5947bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 28(%[dst]) \n\t" /* store */ 5955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 36(%[src]) \n\t" 5965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 36(%[dst]) \n\t" 5977bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 40(%[src]) \n\t" 5995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 40(%[dst]) \n\t" 6007bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 32(%[dst]) \n\t" /* store */ 6017bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6027bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 36(%[dst]) \n\t" /* store */ 6035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 44(%[src]) \n\t" 6045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 44(%[dst]) \n\t" 6057bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 48(%[src]) \n\t" 6075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 48(%[dst]) \n\t" 6087bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 40(%[dst]) \n\t" /* store */ 6097bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6107bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 44(%[dst]) \n\t" /* store */ 6115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 52(%[src]) \n\t" 6125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 52(%[dst]) \n\t" 6137bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 56(%[src]) \n\t" 6155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 56(%[dst]) \n\t" 6167bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 48(%[dst]) \n\t" /* store */ 6177bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6187bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 52(%[dst]) \n\t" /* store */ 6195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 60(%[src]) \n\t" 6205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 60(%[dst]) \n\t" 6217bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6227bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn1], 56(%[dst]) \n\t" /* store */ 6237bc9febe8749e98a3812a0dc4380ceae75c29450Johann "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6247bc9febe8749e98a3812a0dc4380ceae75c29450Johann "sw %[tn2], 60(%[dst]) \n\t" /* store */ 6257bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6267bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), 6277bc9febe8749e98a3812a0dc4380ceae75c29450Johann [tp4] "=&r"(tp4), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2) 6287bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [src] "r"(src), [dst] "r"(dst)); 6295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 6315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 6325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 6345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang default: 6355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y > 0; --y) { 6365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (x = 0; x < w; ++x) { 6375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst[x] = (dst[x] + src[x] + 1) >> 1; 6385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 6415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 6425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 6445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 6465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#endif 647