1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include <assert.h> 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include <stdio.h> 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vpx_config.h" 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "./vp9_rtcd.h" 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_common.h" 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx/vpx_integer.h" 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_ports/mem.h" 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/vp9_convolve.h" 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp9/common/mips/dspr2/vp9_common_dspr2.h" 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_DSPR2 23233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void convolve_avg_vert_4_dspr2(const uint8_t *src, 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t src_stride, 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *dst, 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t dst_stride, 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_y, 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t w, 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t h) { 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t x, y; 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *src_ptr; 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *dst_ptr; 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *cm = vp9_ff_cropTbl; 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t vector4a = 64; 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t load1, load2, load3, load4; 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t p1, p2; 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t n1, n2; 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t scratch1, scratch2; 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t store1, store2; 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t vector1b, vector2b, vector3b, vector4b; 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t Temp1, Temp2; 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan vector1b = ((const int32_t *)filter_y)[0]; 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan vector2b = ((const int32_t *)filter_y)[1]; 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan vector3b = ((const int32_t *)filter_y)[2]; 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan vector4b = ((const int32_t *)filter_y)[3]; 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan src -= 3 * src_stride; 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (y = h; y--;) { 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* prefetch data to cache memory */ 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + dst_stride); 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (x = 0; x < w; x += 4) { 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr = src + x; 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr = dst + x; 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load1], 0(%[src_ptr]) \n\t" 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load2], 0(%[src_ptr]) \n\t" 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load3], 0(%[src_ptr]) \n\t" 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load4], 0(%[src_ptr]) \n\t" 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mtlo %[vector4a], $ac0 \n\t" 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mtlo %[vector4a], $ac1 \n\t" 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mtlo %[vector4a], $ac2 \n\t" 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mtlo %[vector4a], $ac3 \n\t" 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mthi $zero, $ac0 \n\t" 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mthi $zero, $ac1 \n\t" 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mthi $zero, $ac2 \n\t" 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mthi $zero, $ac3 \n\t" 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[scratch1], %[load1] \n\t" 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[p1], %[load2] \n\t" 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[scratch2], %[load3] \n\t" 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[p2], %[load4] \n\t" 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[scratch1], %[load1] \n\t" 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[p1], %[load2] \n\t" 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[scratch2], %[load3] \n\t" 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[p2], %[load4] \n\t" 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load1], 0(%[src_ptr]) \n\t" 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load2], 0(%[src_ptr]) \n\t" 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load3], 0(%[src_ptr]) \n\t" 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load4], 0(%[src_ptr]) \n\t" 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[scratch1], %[load1] \n\t" 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[p1], %[load2] \n\t" 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[scratch2], %[load3] \n\t" 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[p2], %[load4] \n\t" 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan "extp %[Temp1], $ac0, 31 \n\t" 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan "extp %[Temp2], $ac1, 31 \n\t" 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[scratch1], %[load1] \n\t" 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[p1], %[load2] \n\t" 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbu %[scratch1], 0(%[dst_ptr]) \n\t" 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[scratch2], %[load3] \n\t" 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[p2], %[load4] \n\t" 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbu %[scratch2], 1(%[dst_ptr]) \n\t" 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbux %[store1], %[Temp1](%[cm]) \n\t" 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan "extp %[Temp1], $ac2, 31 \n\t" 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbux %[store2], %[Temp2](%[cm]) \n\t" 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan "extp %[Temp2], $ac3, 31 \n\t" 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbu %[scratch1], 2(%[dst_ptr]) \n\t" 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sb %[store1], 0(%[dst_ptr]) \n\t" 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sb %[store2], 1(%[dst_ptr]) \n\t" 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbu %[scratch2], 3(%[dst_ptr]) \n\t" 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbux %[store1], %[Temp1](%[cm]) \n\t" 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbux %[store2], %[Temp2](%[cm]) \n\t" 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sb %[store1], 2(%[dst_ptr]) \n\t" 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sb %[store2], 3(%[dst_ptr]) \n\t" 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [load1] "=&r" (load1), [load2] "=&r" (load2), 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan [load3] "=&r" (load3), [load4] "=&r" (load4), 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan [p1] "=&r" (p1), [p2] "=&r" (p2), [n1] "=&r" (n1), [n2] "=&r" (n2), 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan [scratch1] "=&r" (scratch1), [scratch2] "=&r" (scratch2), 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan [store1] "=&r" (store1), [store2] "=&r" (store2), 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan [src_ptr] "+r" (src_ptr) 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan [vector3b] "r" (vector3b), [vector4b] "r" (vector4b), 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan [vector4a] "r" (vector4a), 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan [src_stride] "r" (src_stride), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr) 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* Next row... */ 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan 185233d2500723e5594f3e7c70896ffeeef32b9c950ywanstatic void convolve_avg_vert_64_dspr2(const uint8_t *src, 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t src_stride, 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *dst, 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t dst_stride, 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_y, 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t h) { 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t x, y; 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan const uint8_t *src_ptr; 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *dst_ptr; 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *cm = vp9_ff_cropTbl; 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t vector4a = 64; 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t load1, load2, load3, load4; 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t p1, p2; 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t n1, n2; 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t scratch1, scratch2; 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t store1, store2; 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t vector1b, vector2b, vector3b, vector4b; 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t Temp1, Temp2; 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan vector1b = ((const int32_t *)filter_y)[0]; 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan vector2b = ((const int32_t *)filter_y)[1]; 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan vector3b = ((const int32_t *)filter_y)[2]; 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan vector4b = ((const int32_t *)filter_y)[3]; 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan src -= 3 * src_stride; 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (y = h; y--;) { 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* prefetch data to cache memory */ 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + dst_stride); 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + dst_stride + 32); 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (x = 0; x < 64; x += 4) { 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan src_ptr = src + x; 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst_ptr = dst + x; 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load1], 0(%[src_ptr]) \n\t" 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load2], 0(%[src_ptr]) \n\t" 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load3], 0(%[src_ptr]) \n\t" 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 227233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load4], 0(%[src_ptr]) \n\t" 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mtlo %[vector4a], $ac0 \n\t" 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mtlo %[vector4a], $ac1 \n\t" 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mtlo %[vector4a], $ac2 \n\t" 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mtlo %[vector4a], $ac3 \n\t" 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mthi $zero, $ac0 \n\t" 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mthi $zero, $ac1 \n\t" 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mthi $zero, $ac2 \n\t" 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan "mthi $zero, $ac3 \n\t" 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[scratch1], %[load1] \n\t" 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[p1], %[load2] \n\t" 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[scratch2], %[load3] \n\t" 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[p2], %[load4] \n\t" 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[scratch1], %[load1] \n\t" 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[p1], %[load2] \n\t" 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[scratch2], %[load3] \n\t" 257233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[p2], %[load4] \n\t" 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load1], 0(%[src_ptr]) \n\t" 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load2], 0(%[src_ptr]) \n\t" 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 271233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load3], 0(%[src_ptr]) \n\t" 272233d2500723e5594f3e7c70896ffeeef32b9c950ywan "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[load4], 0(%[src_ptr]) \n\t" 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[scratch1], %[load1] \n\t" 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[p1], %[load2] \n\t" 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 278233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[scratch2], %[load3] \n\t" 280233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbr %[p2], %[load4] \n\t" 281233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 282233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 283233d2500723e5594f3e7c70896ffeeef32b9c950ywan 284233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" 285233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" 286233d2500723e5594f3e7c70896ffeeef32b9c950ywan "extp %[Temp1], $ac0, 31 \n\t" 287233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" 289233d2500723e5594f3e7c70896ffeeef32b9c950ywan "extp %[Temp2], $ac1, 31 \n\t" 290233d2500723e5594f3e7c70896ffeeef32b9c950ywan 291233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[scratch1], %[load1] \n\t" 292233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[p1], %[load2] \n\t" 293233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ 294233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ 295233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbu %[scratch1], 0(%[dst_ptr]) \n\t" 296233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[scratch2], %[load3] \n\t" 297233d2500723e5594f3e7c70896ffeeef32b9c950ywan "preceu.ph.qbl %[p2], %[load4] \n\t" 298233d2500723e5594f3e7c70896ffeeef32b9c950ywan "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ 299233d2500723e5594f3e7c70896ffeeef32b9c950ywan "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ 300233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbu %[scratch2], 1(%[dst_ptr]) \n\t" 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbux %[store1], %[Temp1](%[cm]) \n\t" 303233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" 304233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan "extp %[Temp1], $ac2, 31 \n\t" 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbux %[store2], %[Temp2](%[cm]) \n\t" 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan "extp %[Temp2], $ac3, 31 \n\t" 313233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbu %[scratch1], 2(%[dst_ptr]) \n\t" 314233d2500723e5594f3e7c70896ffeeef32b9c950ywan 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sb %[store1], 0(%[dst_ptr]) \n\t" 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sb %[store2], 1(%[dst_ptr]) \n\t" 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbu %[scratch2], 3(%[dst_ptr]) \n\t" 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbux %[store1], %[Temp1](%[cm]) \n\t" 320233d2500723e5594f3e7c70896ffeeef32b9c950ywan "lbux %[store2], %[Temp2](%[cm]) \n\t" 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ 322233d2500723e5594f3e7c70896ffeeef32b9c950ywan "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ 323233d2500723e5594f3e7c70896ffeeef32b9c950ywan 324233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sb %[store1], 2(%[dst_ptr]) \n\t" 325233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sb %[store2], 3(%[dst_ptr]) \n\t" 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan 327233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [load1] "=&r" (load1), [load2] "=&r" (load2), 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan [load3] "=&r" (load3), [load4] "=&r" (load4), 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan [p1] "=&r" (p1), [p2] "=&r" (p2), [n1] "=&r" (n1), [n2] "=&r" (n2), 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan [scratch1] "=&r" (scratch1), [scratch2] "=&r" (scratch2), 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan [store1] "=&r" (store1), [store2] "=&r" (store2), 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan [src_ptr] "+r" (src_ptr) 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan [vector3b] "r" (vector3b), [vector4b] "r" (vector4b), 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan [vector4a] "r" (vector4a), 337233d2500723e5594f3e7c70896ffeeef32b9c950ywan [src_stride] "r" (src_stride), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr) 338233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 340233d2500723e5594f3e7c70896ffeeef32b9c950ywan 341233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* Next row... */ 342233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 343233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 344233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 345233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 346233d2500723e5594f3e7c70896ffeeef32b9c950ywan 347233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, 348233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *dst, ptrdiff_t dst_stride, 349233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_x, int x_step_q4, 350233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_y, int y_step_q4, 351233d2500723e5594f3e7c70896ffeeef32b9c950ywan int w, int h) { 352233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (((const int32_t *)filter_y)[1] == 0x800000) { 353233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve_avg(src, src_stride, 354233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, dst_stride, 355233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, 356233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y, y_step_q4, 357233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, h); 358233d2500723e5594f3e7c70896ffeeef32b9c950ywan } else if (((const int32_t *)filter_y)[0] == 0) { 359233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve2_avg_vert_dspr2(src, src_stride, 360233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, dst_stride, 361233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, 362233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y, y_step_q4, 363233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, h); 364233d2500723e5594f3e7c70896ffeeef32b9c950ywan } else { 365233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (16 == y_step_q4) { 366233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t pos = 38; 367233d2500723e5594f3e7c70896ffeeef32b9c950ywan 368233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* bit positon for extract from acc */ 369233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 370233d2500723e5594f3e7c70896ffeeef32b9c950ywan "wrdsp %[pos], 1 \n\t" 371233d2500723e5594f3e7c70896ffeeef32b9c950ywan : 372233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [pos] "r" (pos) 373233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 374233d2500723e5594f3e7c70896ffeeef32b9c950ywan 375233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst); 376233d2500723e5594f3e7c70896ffeeef32b9c950ywan 377233d2500723e5594f3e7c70896ffeeef32b9c950ywan switch (w) { 378233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 4: 379233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 8: 380233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 16: 381233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 32: 382233d2500723e5594f3e7c70896ffeeef32b9c950ywan convolve_avg_vert_4_dspr2(src, src_stride, 383233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, dst_stride, 384233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y, w, h); 385233d2500723e5594f3e7c70896ffeeef32b9c950ywan break; 386233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 64: 387233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + 32); 388233d2500723e5594f3e7c70896ffeeef32b9c950ywan convolve_avg_vert_64_dspr2(src, src_stride, 389233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, dst_stride, 390233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y, h); 391233d2500723e5594f3e7c70896ffeeef32b9c950ywan break; 392233d2500723e5594f3e7c70896ffeeef32b9c950ywan default: 393233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_avg_vert_c(src, src_stride, 394233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, dst_stride, 395233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, 396233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y, y_step_q4, 397233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, h); 398233d2500723e5594f3e7c70896ffeeef32b9c950ywan break; 399233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 400233d2500723e5594f3e7c70896ffeeef32b9c950ywan } else { 401233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_avg_vert_c(src, src_stride, 402233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, dst_stride, 403233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, 404233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y, y_step_q4, 405233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, h); 406233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 407233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 408233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 409233d2500723e5594f3e7c70896ffeeef32b9c950ywan 410233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, 411233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *dst, ptrdiff_t dst_stride, 412233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_x, int x_step_q4, 413233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_y, int y_step_q4, 414233d2500723e5594f3e7c70896ffeeef32b9c950ywan int w, int h) { 415233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* Fixed size intermediate buffer places limits on parameters. */ 416233d2500723e5594f3e7c70896ffeeef32b9c950ywan DECLARE_ALIGNED_ARRAY(32, uint8_t, temp, 64 * 135); 417233d2500723e5594f3e7c70896ffeeef32b9c950ywan int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7; 418233d2500723e5594f3e7c70896ffeeef32b9c950ywan 419233d2500723e5594f3e7c70896ffeeef32b9c950ywan assert(w <= 64); 420233d2500723e5594f3e7c70896ffeeef32b9c950ywan assert(h <= 64); 421233d2500723e5594f3e7c70896ffeeef32b9c950ywan 422233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (intermediate_height < h) 423233d2500723e5594f3e7c70896ffeeef32b9c950ywan intermediate_height = h; 424233d2500723e5594f3e7c70896ffeeef32b9c950ywan 425233d2500723e5594f3e7c70896ffeeef32b9c950ywan if (x_step_q4 != 16 || y_step_q4 != 16) 426233d2500723e5594f3e7c70896ffeeef32b9c950ywan return vp9_convolve8_avg_c(src, src_stride, 427233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, dst_stride, 428233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, 429233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y, y_step_q4, 430233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, h); 431233d2500723e5594f3e7c70896ffeeef32b9c950ywan 432233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_horiz(src - (src_stride * 3), src_stride, 433233d2500723e5594f3e7c70896ffeeef32b9c950ywan temp, 64, 434233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, 435233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y, y_step_q4, 436233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, intermediate_height); 437233d2500723e5594f3e7c70896ffeeef32b9c950ywan 438233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_convolve8_avg_vert(temp + 64 * 3, 64, 439233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst, dst_stride, 440233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_x, x_step_q4, 441233d2500723e5594f3e7c70896ffeeef32b9c950ywan filter_y, y_step_q4, 442233d2500723e5594f3e7c70896ffeeef32b9c950ywan w, h); 443233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 444233d2500723e5594f3e7c70896ffeeef32b9c950ywan 445233d2500723e5594f3e7c70896ffeeef32b9c950ywanvoid vp9_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, 446233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint8_t *dst, ptrdiff_t dst_stride, 447233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_x, int filter_x_stride, 448233d2500723e5594f3e7c70896ffeeef32b9c950ywan const int16_t *filter_y, int filter_y_stride, 449233d2500723e5594f3e7c70896ffeeef32b9c950ywan int w, int h) { 450233d2500723e5594f3e7c70896ffeeef32b9c950ywan int x, y; 451233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t tp1, tp2, tn1; 452233d2500723e5594f3e7c70896ffeeef32b9c950ywan uint32_t tp3, tp4, tn2; 453233d2500723e5594f3e7c70896ffeeef32b9c950ywan 454233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* prefetch data to cache memory */ 455233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src); 456233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + 32); 457233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst); 458233d2500723e5594f3e7c70896ffeeef32b9c950ywan 459233d2500723e5594f3e7c70896ffeeef32b9c950ywan switch (w) { 460233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 4: 461233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* 1 word storage */ 462233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (y = h; y--; ) { 463233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride); 464233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride + 32); 465233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + dst_stride); 466233d2500723e5594f3e7c70896ffeeef32b9c950ywan 467233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 468233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 0(%[src]) \n\t" 469233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 0(%[dst]) \n\t" 470233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 471233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 0(%[dst]) \n\t" /* store */ 472233d2500723e5594f3e7c70896ffeeef32b9c950ywan 473233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [tn1] "=&r" (tn1), [tp1] "=&r" (tp1), 474233d2500723e5594f3e7c70896ffeeef32b9c950ywan [tp2] "=&r" (tp2) 475233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [src] "r" (src), [dst] "r" (dst) 476233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 477233d2500723e5594f3e7c70896ffeeef32b9c950ywan 478233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 479233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 480233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 481233d2500723e5594f3e7c70896ffeeef32b9c950ywan break; 482233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 8: 483233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* 2 word storage */ 484233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (y = h; y--; ) { 485233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride); 486233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride + 32); 487233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + dst_stride); 488233d2500723e5594f3e7c70896ffeeef32b9c950ywan 489233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 490233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 0(%[src]) \n\t" 491233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 0(%[dst]) \n\t" 492233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 4(%[src]) \n\t" 493233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 4(%[dst]) \n\t" 494233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 495233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 0(%[dst]) \n\t" /* store */ 496233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 497233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 4(%[dst]) \n\t" /* store */ 498233d2500723e5594f3e7c70896ffeeef32b9c950ywan 499233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 500233d2500723e5594f3e7c70896ffeeef32b9c950ywan [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 501233d2500723e5594f3e7c70896ffeeef32b9c950ywan [tn1] "=&r" (tn1), [tn2] "=&r" (tn2) 502233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [src] "r" (src), [dst] "r" (dst) 503233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 504233d2500723e5594f3e7c70896ffeeef32b9c950ywan 505233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 506233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 507233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 508233d2500723e5594f3e7c70896ffeeef32b9c950ywan break; 509233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 16: 510233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* 4 word storage */ 511233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (y = h; y--; ) { 512233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride); 513233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride + 32); 514233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + dst_stride); 515233d2500723e5594f3e7c70896ffeeef32b9c950ywan 516233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 517233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 0(%[src]) \n\t" 518233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 0(%[dst]) \n\t" 519233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 4(%[src]) \n\t" 520233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 4(%[dst]) \n\t" 521233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 522233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 8(%[src]) \n\t" 523233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 8(%[dst]) \n\t" 524233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 0(%[dst]) \n\t" /* store */ 525233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 526233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 4(%[dst]) \n\t" /* store */ 527233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 12(%[src]) \n\t" 528233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 12(%[dst]) \n\t" 529233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 530233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 8(%[dst]) \n\t" /* store */ 531233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 532233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 12(%[dst]) \n\t" /* store */ 533233d2500723e5594f3e7c70896ffeeef32b9c950ywan 534233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 535233d2500723e5594f3e7c70896ffeeef32b9c950ywan [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 536233d2500723e5594f3e7c70896ffeeef32b9c950ywan [tn1] "=&r" (tn1), [tn2] "=&r" (tn2) 537233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [src] "r" (src), [dst] "r" (dst) 538233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 539233d2500723e5594f3e7c70896ffeeef32b9c950ywan 540233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 541233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 542233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 543233d2500723e5594f3e7c70896ffeeef32b9c950ywan break; 544233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 32: 545233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* 8 word storage */ 546233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (y = h; y--; ) { 547233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride); 548233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride + 32); 549233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + dst_stride); 550233d2500723e5594f3e7c70896ffeeef32b9c950ywan 551233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 552233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 0(%[src]) \n\t" 553233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 0(%[dst]) \n\t" 554233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 4(%[src]) \n\t" 555233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 4(%[dst]) \n\t" 556233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 557233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 8(%[src]) \n\t" 558233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 8(%[dst]) \n\t" 559233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 0(%[dst]) \n\t" /* store */ 560233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 561233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 4(%[dst]) \n\t" /* store */ 562233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 12(%[src]) \n\t" 563233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 12(%[dst]) \n\t" 564233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 565233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 16(%[src]) \n\t" 566233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 16(%[dst]) \n\t" 567233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 8(%[dst]) \n\t" /* store */ 568233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 569233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 12(%[dst]) \n\t" /* store */ 570233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 20(%[src]) \n\t" 571233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 20(%[dst]) \n\t" 572233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 573233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 24(%[src]) \n\t" 574233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 24(%[dst]) \n\t" 575233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 16(%[dst]) \n\t" /* store */ 576233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 577233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 20(%[dst]) \n\t" /* store */ 578233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 28(%[src]) \n\t" 579233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 28(%[dst]) \n\t" 580233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 581233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 24(%[dst]) \n\t" /* store */ 582233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 583233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 28(%[dst]) \n\t" /* store */ 584233d2500723e5594f3e7c70896ffeeef32b9c950ywan 585233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 586233d2500723e5594f3e7c70896ffeeef32b9c950ywan [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 587233d2500723e5594f3e7c70896ffeeef32b9c950ywan [tn1] "=&r" (tn1), [tn2] "=&r" (tn2) 588233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [src] "r" (src), [dst] "r" (dst) 589233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 590233d2500723e5594f3e7c70896ffeeef32b9c950ywan 591233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 592233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 593233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 594233d2500723e5594f3e7c70896ffeeef32b9c950ywan break; 595233d2500723e5594f3e7c70896ffeeef32b9c950ywan case 64: 596233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + 64); 597233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + 32); 598233d2500723e5594f3e7c70896ffeeef32b9c950ywan 599233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* 16 word storage */ 600233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (y = h; y--; ) { 601233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride); 602233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride + 32); 603233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_load(src + src_stride + 64); 604233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + dst_stride); 605233d2500723e5594f3e7c70896ffeeef32b9c950ywan vp9_prefetch_store(dst + dst_stride + 32); 606233d2500723e5594f3e7c70896ffeeef32b9c950ywan 607233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 608233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 0(%[src]) \n\t" 609233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 0(%[dst]) \n\t" 610233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 4(%[src]) \n\t" 611233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 4(%[dst]) \n\t" 612233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 613233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 8(%[src]) \n\t" 614233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 8(%[dst]) \n\t" 615233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 0(%[dst]) \n\t" /* store */ 616233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 617233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 4(%[dst]) \n\t" /* store */ 618233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 12(%[src]) \n\t" 619233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 12(%[dst]) \n\t" 620233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 621233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 16(%[src]) \n\t" 622233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 16(%[dst]) \n\t" 623233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 8(%[dst]) \n\t" /* store */ 624233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 625233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 12(%[dst]) \n\t" /* store */ 626233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 20(%[src]) \n\t" 627233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 20(%[dst]) \n\t" 628233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 629233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 24(%[src]) \n\t" 630233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 24(%[dst]) \n\t" 631233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 16(%[dst]) \n\t" /* store */ 632233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 633233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 20(%[dst]) \n\t" /* store */ 634233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 28(%[src]) \n\t" 635233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 28(%[dst]) \n\t" 636233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 637233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 32(%[src]) \n\t" 638233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 32(%[dst]) \n\t" 639233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 24(%[dst]) \n\t" /* store */ 640233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 641233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 28(%[dst]) \n\t" /* store */ 642233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 36(%[src]) \n\t" 643233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 36(%[dst]) \n\t" 644233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 645233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 40(%[src]) \n\t" 646233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 40(%[dst]) \n\t" 647233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 32(%[dst]) \n\t" /* store */ 648233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 649233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 36(%[dst]) \n\t" /* store */ 650233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 44(%[src]) \n\t" 651233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 44(%[dst]) \n\t" 652233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 653233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 48(%[src]) \n\t" 654233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 48(%[dst]) \n\t" 655233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 40(%[dst]) \n\t" /* store */ 656233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 657233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 44(%[dst]) \n\t" /* store */ 658233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 52(%[src]) \n\t" 659233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 52(%[dst]) \n\t" 660233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 661233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp1], 56(%[src]) \n\t" 662233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp2], 56(%[dst]) \n\t" 663233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 48(%[dst]) \n\t" /* store */ 664233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 665233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 52(%[dst]) \n\t" /* store */ 666233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp3], 60(%[src]) \n\t" 667233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[tp4], 60(%[dst]) \n\t" 668233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ 669233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn1], 56(%[dst]) \n\t" /* store */ 670233d2500723e5594f3e7c70896ffeeef32b9c950ywan "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ 671233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[tn2], 60(%[dst]) \n\t" /* store */ 672233d2500723e5594f3e7c70896ffeeef32b9c950ywan 673233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 674233d2500723e5594f3e7c70896ffeeef32b9c950ywan [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 675233d2500723e5594f3e7c70896ffeeef32b9c950ywan [tn1] "=&r" (tn1), [tn2] "=&r" (tn2) 676233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [src] "r" (src), [dst] "r" (dst) 677233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 678233d2500723e5594f3e7c70896ffeeef32b9c950ywan 679233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 680233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 681233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 682233d2500723e5594f3e7c70896ffeeef32b9c950ywan break; 683233d2500723e5594f3e7c70896ffeeef32b9c950ywan default: 684233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (y = h; y > 0; --y) { 685233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (x = 0; x < w; ++x) { 686233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst[x] = (dst[x] + src[x] + 1) >> 1; 687233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 688233d2500723e5594f3e7c70896ffeeef32b9c950ywan 689233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 690233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 691233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 692233d2500723e5594f3e7c70896ffeeef32b9c950ywan break; 693233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 694233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 695233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif 696