15ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang/* 25ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 35ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * 45ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Use of this source code is governed by a BSD-style license 55ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * that can be found in the LICENSE file in the root of the source 65ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * tree. An additional intellectual property rights grant can be found 75ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * in the file PATENTS. All contributing project authors may 85ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * be found in the AUTHORS file in the root of the source tree. 95ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <assert.h> 125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <stdio.h> 135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vpx_config.h" 155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vp9_rtcd.h" 165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_common.h" 175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx/vpx_integer.h" 185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx_ports/mem.h" 195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_convolve.h" 205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/mips/dspr2/vp9_common_dspr2.h" 215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#if HAVE_DSPR2 235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_vert_4_dspr2(const uint8_t *src, 245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, 265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_y, 285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t w, 295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h) { 305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t x, y; 315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src_ptr; 325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst_ptr; 335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t load1, load2, load3, load4; 365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2; 375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t n1, n2; 385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t scratch1, scratch2; 395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t store1, store2; 405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t vector1b, vector2b, vector3b, vector4b; 415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2; 425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_y)[0]; 445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_y)[1]; 455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_y)[2]; 465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_y)[3]; 475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src -= 3 * src_stride; 495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (x = 0; x < w; x += 4) { 555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr = src + x; 565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr = dst + x; 575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load1], 0(%[src_ptr]) \n\t" 605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load2], 0(%[src_ptr]) \n\t" 625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load3], 0(%[src_ptr]) \n\t" 645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load4], 0(%[src_ptr]) \n\t" 665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac0 \n\t" 685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac0 \n\t" 725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch1], %[load1] \n\t" 775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[load2] \n\t" 785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch2], %[load3] \n\t" 815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[load4] \n\t" 825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" 865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" 875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" 885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" 895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch1], %[load1] \n\t" 915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[load2] \n\t" 925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch2], %[load3] \n\t" 955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[load4] \n\t" 965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" 1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" 1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load1], 0(%[src_ptr]) \n\t" 1065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 1075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load2], 0(%[src_ptr]) \n\t" 1085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 1095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load3], 0(%[src_ptr]) \n\t" 1105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 1115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load4], 0(%[src_ptr]) \n\t" 1125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch1], %[load1] \n\t" 1145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[load2] \n\t" 1155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 1165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 1175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch2], %[load3] \n\t" 1185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[load4] \n\t" 1195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 1205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 1215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" 1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" 1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac0, 31 \n\t" 1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" 1265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" 1275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac1, 31 \n\t" 1285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch1], %[load1] \n\t" 1305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[load2] \n\t" 1315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 1325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch1], 0(%[dst_ptr]) \n\t" 1345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch2], %[load3] \n\t" 1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[load4] \n\t" 1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch2], 1(%[dst_ptr]) \n\t" 1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store1], %[Temp1](%[cm]) \n\t" 1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" 1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ 1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac2, 31 \n\t" 1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store2], %[Temp2](%[cm]) \n\t" 1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" 1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" 1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ 1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch1], 2(%[dst_ptr]) \n\t" 1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store1], 0(%[dst_ptr]) \n\t" 1545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store2], 1(%[dst_ptr]) \n\t" 1555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch2], 3(%[dst_ptr]) \n\t" 1565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store1], %[Temp1](%[cm]) \n\t" 1585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store2], %[Temp2](%[cm]) \n\t" 1595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ 1605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ 1615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store1], 2(%[dst_ptr]) \n\t" 1635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store2], 3(%[dst_ptr]) \n\t" 1645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [load1] "=&r" (load1), [load2] "=&r" (load2), 1665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [load3] "=&r" (load3), [load4] "=&r" (load4), 1675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [n1] "=&r" (n1), [n2] "=&r" (n2), 1685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [scratch1] "=&r" (scratch1), [scratch2] "=&r" (scratch2), 1695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 1705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [store1] "=&r" (store1), [store2] "=&r" (store2), 1715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [src_ptr] "+r" (src_ptr) 1725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 1735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector3b] "r" (vector3b), [vector4b] "r" (vector4b), 1745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector4a] "r" (vector4a), 1755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [src_stride] "r" (src_stride), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr) 1765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 1775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 1805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 1815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 1825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 1845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_avg_vert_64_dspr2(const uint8_t *src, 1865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 1875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, 1885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 1895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_y, 1905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h) { 1915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t x, y; 1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src_ptr; 1935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst_ptr; 1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 1955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 1965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t load1, load2, load3, load4; 1975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2; 1985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t n1, n2; 1995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t scratch1, scratch2; 2005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t store1, store2; 2015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t vector1b, vector2b, vector3b, vector4b; 2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2; 2035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_y)[0]; 2055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_y)[1]; 2065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_y)[2]; 2075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_y)[3]; 2085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src -= 3 * src_stride; 2105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 2125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 2135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 2145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride + 32); 2155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (x = 0; x < 64; x += 4) { 2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr = src + x; 2185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr = dst + x; 2195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 2215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load1], 0(%[src_ptr]) \n\t" 2225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load2], 0(%[src_ptr]) \n\t" 2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load3], 0(%[src_ptr]) \n\t" 2265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load4], 0(%[src_ptr]) \n\t" 2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac0 \n\t" 2305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 2315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 2325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 2335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac0 \n\t" 2345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 2355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 2365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch1], %[load1] \n\t" 2395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[load2] \n\t" 2405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 2415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 2425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch2], %[load3] \n\t" 2435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[load4] \n\t" 2445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 2455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 2465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" 2485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" 2495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" 2505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" 2515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch1], %[load1] \n\t" 2535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[load2] \n\t" 2545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 2555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 2565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch2], %[load3] \n\t" 2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[load4] \n\t" 2585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 2595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 2605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" 2625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" 2635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 2645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load1], 0(%[src_ptr]) \n\t" 2685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load2], 0(%[src_ptr]) \n\t" 2705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load3], 0(%[src_ptr]) \n\t" 2725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 2735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[load4], 0(%[src_ptr]) \n\t" 2745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch1], %[load1] \n\t" 2765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[load2] \n\t" 2775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 2785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[scratch2], %[load3] \n\t" 2805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[load4] \n\t" 2815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 2825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" 2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" 2865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac0, 31 \n\t" 2875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" 2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" 2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac1, 31 \n\t" 2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch1], %[load1] \n\t" 2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[load2] \n\t" 2935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch1], 0(%[dst_ptr]) \n\t" 2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[scratch2], %[load3] \n\t" 2975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[load4] \n\t" 2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 3005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch2], 1(%[dst_ptr]) \n\t" 3015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store1], %[Temp1](%[cm]) \n\t" 3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" 3045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 3055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ 3065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac2, 31 \n\t" 3075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store2], %[Temp2](%[cm]) \n\t" 3095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" 3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" 3115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ 3125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 3135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch1], 2(%[dst_ptr]) \n\t" 3145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store1], 0(%[dst_ptr]) \n\t" 3165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store2], 1(%[dst_ptr]) \n\t" 3175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbu %[scratch2], 3(%[dst_ptr]) \n\t" 3185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store1], %[Temp1](%[cm]) \n\t" 3205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[store2], %[Temp2](%[cm]) \n\t" 3215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ 3225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ 3235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store1], 2(%[dst_ptr]) \n\t" 3255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[store2], 3(%[dst_ptr]) \n\t" 3265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [load1] "=&r" (load1), [load2] "=&r" (load2), 3285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [load3] "=&r" (load3), [load4] "=&r" (load4), 3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [n1] "=&r" (n1), [n2] "=&r" (n2), 3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [scratch1] "=&r" (scratch1), [scratch2] "=&r" (scratch2), 3315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 3325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [store1] "=&r" (store1), [store2] "=&r" (store2), 3335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [src_ptr] "+r" (src_ptr) 3345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 3355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector3b] "r" (vector3b), [vector4b] "r" (vector4b), 3365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector4a] "r" (vector4a), 3375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [src_stride] "r" (src_stride), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr) 3385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 3395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 3425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 3435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 3445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 3465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, 3485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 3495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x, int x_step_q4, 3505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_y, int y_step_q4, 3515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int w, int h) { 3525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (((const int32_t *)filter_y)[1] == 0x800000) { 3535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve_avg(src, src_stride, 3545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 3555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 3565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 3575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 3585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else if (((const int32_t *)filter_y)[0] == 0) { 3595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve2_avg_vert_dspr2(src, src_stride, 3605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 3615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 3625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 3635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 3645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 3655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (16 == y_step_q4) { 3665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t pos = 38; 3675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* bit positon for extract from acc */ 3695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 3705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "wrdsp %[pos], 1 \n\t" 3715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : 3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [pos] "r" (pos) 3735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst); 3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang switch (w) { 3785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 4: 3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 8: 3805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 16: 3815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 32: 3825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_avg_vert_4_dspr2(src, src_stride, 3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, w, h); 3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 3865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 64: 3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + 32); 3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_avg_vert_64_dspr2(src, src_stride, 3895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 3905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, h); 3915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 3925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang default: 3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve8_avg_vert_c(src, src_stride, 3945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 3955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 3965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 3975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 3985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 3995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 4015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve8_avg_vert_c(src, src_stride, 4025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 4035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 4045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 4055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 4065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 4095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, 4115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 4125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x, int x_step_q4, 4135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_y, int y_step_q4, 4145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int w, int h) { 4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Fixed size intermediate buffer places limits on parameters. */ 4165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang DECLARE_ALIGNED_ARRAY(32, uint8_t, temp, 64 * 135); 4175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7; 4185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang assert(w <= 64); 4205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang assert(h <= 64); 4215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (intermediate_height < h) 4235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang intermediate_height = h; 4245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (x_step_q4 != 16 || y_step_q4 != 16) 4265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang return vp9_convolve8_avg_c(src, src_stride, 4275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 4285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 4295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 4305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 4315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve8_horiz(src - (src_stride * 3), src_stride, 4335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp, 64, 4345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 4355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 4365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, intermediate_height); 4375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve8_avg_vert(temp + 64 * 3, 64, 4395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 4405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 4415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 4425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 4435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 4445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, 4465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 4475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x, int filter_x_stride, 4485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_y, int filter_y_stride, 4495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int w, int h) { 4505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int x, y; 4515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2, tn1; 4525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp3, tp4, tn2; 4535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 4555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src); 4565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + 32); 4575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst); 4585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang switch (w) { 4605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 4: 4615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 1 word storage */ 4625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 4635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 4645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 4655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 4665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 4685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 4695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 4705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 4715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 0(%[dst]) \n\t" /* store */ 4725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tn1] "=&r" (tn1), [tp1] "=&r" (tp1), 4745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp2] "=&r" (tp2) 4755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 4765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 4775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 4795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 4805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 4815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 4825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 8: 4835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 2 word storage */ 4845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 4855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 4865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 4875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 4885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 4905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 4915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 4925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 4(%[src]) \n\t" 4935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 4(%[dst]) \n\t" 4945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 4955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 0(%[dst]) \n\t" /* store */ 4965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 4975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 4(%[dst]) \n\t" /* store */ 4985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 5005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 5015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tn1] "=&r" (tn1), [tn2] "=&r" (tn2) 5025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 5035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 5045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 5065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 5075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 5085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 5095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 16: 5105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 4 word storage */ 5115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 5125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 5135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 5145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 5155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 5175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 5185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 5195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 4(%[src]) \n\t" 5205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 4(%[dst]) \n\t" 5215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 8(%[src]) \n\t" 5235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 8(%[dst]) \n\t" 5245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 0(%[dst]) \n\t" /* store */ 5255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 4(%[dst]) \n\t" /* store */ 5275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 12(%[src]) \n\t" 5285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 12(%[dst]) \n\t" 5295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 8(%[dst]) \n\t" /* store */ 5315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 12(%[dst]) \n\t" /* store */ 5335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 5355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 5365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tn1] "=&r" (tn1), [tn2] "=&r" (tn2) 5375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 5385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 5395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 5415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 5425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 5435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 5445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 32: 5455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 8 word storage */ 5465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 5475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 5485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 5495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 5505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 5525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 5535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 5545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 4(%[src]) \n\t" 5555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 4(%[dst]) \n\t" 5565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 8(%[src]) \n\t" 5585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 8(%[dst]) \n\t" 5595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 0(%[dst]) \n\t" /* store */ 5605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 4(%[dst]) \n\t" /* store */ 5625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 12(%[src]) \n\t" 5635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 12(%[dst]) \n\t" 5645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 16(%[src]) \n\t" 5665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 16(%[dst]) \n\t" 5675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 8(%[dst]) \n\t" /* store */ 5685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 12(%[dst]) \n\t" /* store */ 5705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 20(%[src]) \n\t" 5715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 20(%[dst]) \n\t" 5725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 24(%[src]) \n\t" 5745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 24(%[dst]) \n\t" 5755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 16(%[dst]) \n\t" /* store */ 5765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 20(%[dst]) \n\t" /* store */ 5785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 28(%[src]) \n\t" 5795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 28(%[dst]) \n\t" 5805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 5815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 24(%[dst]) \n\t" /* store */ 5825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 5835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 28(%[dst]) \n\t" /* store */ 5845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 5865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 5875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tn1] "=&r" (tn1), [tn2] "=&r" (tn2) 5885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 5895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 5905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 5925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 5935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 5945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 5955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 64: 5965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + 64); 5975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + 32); 5985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 16 word storage */ 6005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 6015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 6025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 6035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 64); 6045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 6055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride + 32); 6065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 6085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 6095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[dst]) \n\t" 6105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 4(%[src]) \n\t" 6115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 4(%[dst]) \n\t" 6125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 8(%[src]) \n\t" 6145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 8(%[dst]) \n\t" 6155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 0(%[dst]) \n\t" /* store */ 6165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 4(%[dst]) \n\t" /* store */ 6185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 12(%[src]) \n\t" 6195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 12(%[dst]) \n\t" 6205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 16(%[src]) \n\t" 6225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 16(%[dst]) \n\t" 6235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 8(%[dst]) \n\t" /* store */ 6245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 12(%[dst]) \n\t" /* store */ 6265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 20(%[src]) \n\t" 6275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 20(%[dst]) \n\t" 6285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 24(%[src]) \n\t" 6305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 24(%[dst]) \n\t" 6315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 16(%[dst]) \n\t" /* store */ 6325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 20(%[dst]) \n\t" /* store */ 6345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 28(%[src]) \n\t" 6355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 28(%[dst]) \n\t" 6365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 32(%[src]) \n\t" 6385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 32(%[dst]) \n\t" 6395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 24(%[dst]) \n\t" /* store */ 6405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 28(%[dst]) \n\t" /* store */ 6425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 36(%[src]) \n\t" 6435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 36(%[dst]) \n\t" 6445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 40(%[src]) \n\t" 6465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 40(%[dst]) \n\t" 6475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 32(%[dst]) \n\t" /* store */ 6485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 36(%[dst]) \n\t" /* store */ 6505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 44(%[src]) \n\t" 6515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 44(%[dst]) \n\t" 6525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 48(%[src]) \n\t" 6545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 48(%[dst]) \n\t" 6555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 40(%[dst]) \n\t" /* store */ 6565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 44(%[dst]) \n\t" /* store */ 6585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 52(%[src]) \n\t" 6595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 52(%[dst]) \n\t" 6605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 56(%[src]) \n\t" 6625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 56(%[dst]) \n\t" 6635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 48(%[dst]) \n\t" /* store */ 6645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 52(%[dst]) \n\t" /* store */ 6665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 60(%[src]) \n\t" 6675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 60(%[dst]) \n\t" 6685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 6695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn1], 56(%[dst]) \n\t" /* store */ 6705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 6715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tn2], 60(%[dst]) \n\t" /* store */ 6725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 6745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 6755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tn1] "=&r" (tn1), [tn2] "=&r" (tn2) 6765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 6775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 6785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 6805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 6815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 6835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang default: 6845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y > 0; --y) { 6855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (x = 0; x < w; ++x) { 6865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst[x] = (dst[x] + src[x] + 1) >> 1; 6875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 6905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 6915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 6935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 6955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#endif 696