15ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang/* 25ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 35ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * 45ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * Use of this source code is governed by a BSD-style license 55ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * that can be found in the LICENSE file in the root of the source 65ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * tree. An additional intellectual property rights grant can be found 75ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * in the file PATENTS. All contributing project authors may 85ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang * be found in the AUTHORS file in the root of the source tree. 95ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang */ 105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <assert.h> 125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include <stdio.h> 135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vpx_config.h" 155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "./vp9_rtcd.h" 165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_common.h" 175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx/vpx_integer.h" 185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vpx_ports/mem.h" 195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/vp9_filter.h" 205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#include "vp9/common/mips/dspr2/vp9_common_dspr2.h" 215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#if HAVE_DSPR2 235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuanguint8_t vp9_ff_cropTbl_a[256 + 2 * CROP_WIDTH]; 245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuanguint8_t *vp9_ff_cropTbl; 255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_dsputil_static_init(void) { 275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int i; 285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < 256; i++) vp9_ff_cropTbl_a[i + CROP_WIDTH] = i; 305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (i = 0; i < CROP_WIDTH; i++) { 325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_ff_cropTbl_a[i] = 0; 335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_ff_cropTbl_a[i + CROP_WIDTH + 256] = 255; 345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_ff_cropTbl = &vp9_ff_cropTbl_a[CROP_WIDTH]; 375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_horiz_4_transposed_dspr2(const uint8_t *src, 405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, 425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x0, 445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h) { 455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y; 465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst_ptr; 485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t vector1b, vector2b, vector3b, vector4b; 495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3, Temp4; 505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2; 525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4; 535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tn1, tn2; 545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_x0)[0]; 565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_x0)[1]; 575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_x0)[2]; 585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_x0)[3]; 595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr = dst; 625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp1] \n\t" 745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp1] \n\t" 755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tp2] \n\t" 765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tp2] \n\t" 775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tn2], 8(%[src]) \n\t" 815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac3, 31 \n\t" 835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tn2] \n\t" 885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn1], %[tn2], 3 \n\t" 895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tn2], %[tp2], 3 \n\t" 905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "balign %[tp2], %[tp1], 3 \n\t" 915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" 945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" 955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp1], %[Temp1](%[cm]) \n\t" 995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 1015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp2] \n\t" 1025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp2] \n\t" 1035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tn2] \n\t" 1045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tn2] \n\t" 1055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 1065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 1075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 1085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 1095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 1105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 1125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp2], %[Temp3](%[cm]) \n\t" 1135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 1145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 1155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tn1] \n\t" 1165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 1175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 1185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" 1195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" 1205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp4], $ac2, 31 \n\t" 1215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* clamp */ 1235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tn1], %[Temp2](%[cm]) \n\t" 1245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p2], %[Temp4](%[cm]) \n\t" 1255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* store bytes */ 1275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp1], 0(%[dst_ptr]) \n\t" 1285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" 1295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tn1], 0(%[dst_ptr]) \n\t" 1315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" 1325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp2], 0(%[dst_ptr]) \n\t" 1345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" 1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[p2], 0(%[dst_ptr]) \n\t" 1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" 1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), [tn2] "=&r" (tn2), 1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4), 1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), 1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [dst_ptr] "+r" (dst_ptr) 1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 1445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector3b] "r" (vector3b), [vector4b] "r" (vector4b), 1455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector4a] "r" (vector4a), 1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [cm] "r" (cm), [src] "r" (src), [dst_stride] "r" (dst_stride) 1475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 1485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 1505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 1515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 1; 1525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 1535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 1545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_horiz_8_transposed_dspr2(const uint8_t *src, 1565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 1575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, 1585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 1595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x0, 1605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h) { 1615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t y; 1625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 1635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst_ptr; 1645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector4a = 64; 1655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t vector1b, vector2b, vector3b, vector4b; 1665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 1675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2, tp3; 1685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, n1; 1695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *odd_dst; 1705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t dst_pitch_2 = (dst_stride << 1); 1715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector1b = ((const int32_t *)filter_x0)[0]; 1735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector2b = ((const int32_t *)filter_x0)[1]; 1745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector3b = ((const int32_t *)filter_x0)[2]; 1755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vector4b = ((const int32_t *)filter_x0)[3]; 1765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 1785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 1795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 1805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 1815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr = dst; 1835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang odd_dst = (dst_ptr + dst_stride); 1845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 1865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 0(%[src]) \n\t" 1875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 4(%[src]) \n\t" 1885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 1895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 1905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 1915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 1925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 1935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 1945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp2] \n\t" 1955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp2] \n\t" 1965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tp1] \n\t" 1975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tp1] \n\t" 1985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 8(%[src]) \n\t" 1995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 2005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 2015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 2025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 2035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac3, 31 \n\t" 2045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 2065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp3] \n\t" 2075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n1], %[tp3] \n\t" 2085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 12(%[src]) \n\t" 2095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 2105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 2115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" 2125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" 2135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 2145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 2165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[Temp2], %[Temp1](%[cm]) \n\t" 2175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 2185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 2195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[tp2] \n\t" 2205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[vector1b] \n\t" 2215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[vector2b] \n\t" 2225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[vector3b] \n\t" 2235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp3], %[Temp3](%[cm]) \n\t" 2245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[n1], %[vector4b] \n\t" 2255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[p3], $ac1, 31 \n\t" 2265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 2285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 2295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 2305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 2315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 2325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[Temp2], 0(%[dst_ptr]) \n\t" 2335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" 2345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp3], 0(%[dst_ptr]) \n\t" 2355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" 2365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 1(%[src]) \n\t" 2385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 5(%[src]) \n\t" 2395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" 2415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" 2425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" 2435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 2445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac2, 31 \n\t" 2455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp2], %[p3](%[cm]) \n\t" 2475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 2495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac1 \n\t" 2505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 2515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp1] \n\t" 2525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[tp1] \n\t" 2535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[tp3] \n\t" 2545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[tp3] \n\t" 2555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp2], 0(%[dst_ptr]) \n\t" 2565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" 2575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 9(%[src]) \n\t" 2585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 2605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 2615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 2625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" 2635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 2645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 2665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp1], %[Temp3](%[cm]) \n\t" 2675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac3 \n\t" 2685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 2695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector4a], $ac2 \n\t" 2705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 2715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[tp2] \n\t" 2725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[n1], %[tp2] \n\t" 2735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[Temp1], 13(%[src]) \n\t" 2745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[vector1b] \n\t" 2755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp1], 0(%[dst_ptr]) \n\t" 2765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" 2775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[vector2b] \n\t" 2785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[vector3b] \n\t" 2795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[vector4b] \n\t" 2805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac1, 31 \n\t" 2815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 2835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[tp3], %[Temp2](%[cm]) \n\t" 2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[Temp1] \n\t" 2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" 2865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" 2875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" 2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[n1], %[vector4b] \n\t" 2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac3, 31 \n\t" 2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[tp3], 0(%[odd_dst]) \n\t" 2935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" 2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" 2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" 2975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" 2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac2, 31 \n\t" 2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* clamp */ 3015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p4], %[Temp3](%[cm]) \n\t" 3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[p2], %[Temp2](%[cm]) \n\t" 3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[n1], %[Temp1](%[cm]) \n\t" 3045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* store bytes */ 3065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[p4], 0(%[odd_dst]) \n\t" 3075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 3085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[p2], 0(%[odd_dst]) \n\t" 3105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 3115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[n1], 0(%[odd_dst]) \n\t" 3135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tp3] "=&r" (tp3), 3155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4), 3165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [n1] "=&r" (n1), 3175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), 3185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [dst_ptr] "+r" (dst_ptr), [odd_dst] "+r" (odd_dst) 3195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 3205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector3b] "r" (vector3b), [vector4b] "r" (vector4b), 3215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector4a] "r" (vector4a), [cm] "r" (cm), 3225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [src] "r" (src), [dst_pitch_2] "r" (dst_pitch_2) 3235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 3245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 3265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 3275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 1; 3285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 3305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_horiz_16_transposed_dspr2(const uint8_t *src_ptr, 3325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 3335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst_ptr, 3345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 3355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x0, 3365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h, 3375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t count) { 3385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t c, y; 3395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src; 3405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst; 3415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 3425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector_64 = 64; 3435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t filter12, filter34, filter56, filter78; 3445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 3455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t qload1, qload2; 3465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, p5; 3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st1, st2, st3; 3485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t dst_pitch_2 = (dst_stride << 1); 3495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *odd_dst; 3505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter12 = ((const int32_t *)filter_x0)[0]; 3525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter34 = ((const int32_t *)filter_x0)[1]; 3535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter56 = ((const int32_t *)filter_x0)[2]; 3545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter78 = ((const int32_t *)filter_x0)[3]; 3555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 3575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 3585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride); 3595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride + 32); 3605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src = src_ptr; 3625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = dst_ptr; 3635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang odd_dst = (dst + dst_stride); 3655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (c = 0; c < count; c++) { 3675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 3685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 0(%[src]) \n\t" 3695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 4(%[src]) \n\t" 3705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ 3735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 3745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ 3755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 3765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 3775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 3785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 3795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 3805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 8(%[src]) \n\t" 3815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ 3825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ 3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ 3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ 3855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ 3865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ 3895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 3905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload2] \n\t" 3915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload2] \n\t" 3925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 12(%[src]) \n\t" 3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ 3945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ 3955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ 3965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ 3975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ 3985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ 3995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 4015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ 4025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 4045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[dst]) \n\t" /* even 1 */ 4055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 4065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ 4075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ 4085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ 4095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ 4105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ 4115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ 4125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 4145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ 4155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 4165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 4175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* even 2 */ 4185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 4195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 16(%[src]) \n\t" 4205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ 4215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ 4225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ 4235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ 4245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ 4255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ 4265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 5. pixel */ 4285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ 4295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 4305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 4315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[dst]) \n\t" /* even 3 */ 4325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 4335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ 4345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ 4355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ 4365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ 4375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ 4385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ 4395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 6. pixel */ 4415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ 4425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 4445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[dst]) \n\t" /* even 4 */ 4455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 4465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 20(%[src]) \n\t" 4475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ 4485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ 4495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ 4505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ 4515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ 4525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ 4535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 7. pixel */ 4555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ 4565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 4575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload1] \n\t" 4585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* even 5 */ 4595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 4605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ 4615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ 4625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ 4635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ 4645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ 4655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ 4665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 8. pixel */ 4685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ 4695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 4705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ 4715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ 4725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[dst]) \n\t" /* even 6 */ 4735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 4745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ 4755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ 4765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ 4775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ 4785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* ODD pixels */ 4805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 1(%[src]) \n\t" 4815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 5(%[src]) \n\t" 4825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 4835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 4845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ 4855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 4865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 4875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 4885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 4895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 4905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[dst]) \n\t" /* even 7 */ 4915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 4925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 9(%[src]) \n\t" 4935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ 4945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ 4955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ 4965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ 4975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ 4985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ 4995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 5015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ 5025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 5035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload2] \n\t" 5045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload2] \n\t" 5055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* even 8 */ 5065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 13(%[src]) \n\t" 5075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ 5085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ 5095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ 5105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ 5115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ 5125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ 5135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 5155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ 5165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 5175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 5185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[odd_dst]) \n\t" /* odd 1 */ 5195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 5205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ 5215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ 5225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ 5235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ 5245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ 5255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ 5265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 5285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ 5295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 5305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 5315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[odd_dst]) \n\t" /* odd 2 */ 5325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 5335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 17(%[src]) \n\t" 5345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ 5355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ 5365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ 5375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ 5385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ 5395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ 5405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 5. pixel */ 5425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ 5435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 5445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 5455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[odd_dst]) \n\t" /* odd 3 */ 5465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 5475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ 5485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ 5495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ 5505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ 5515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ 5525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ 5535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 6. pixel */ 5555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ 5565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 5575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 5585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[odd_dst]) \n\t" /* odd 4 */ 5595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 5605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 21(%[src]) \n\t" 5615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ 5625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ 5635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ 5645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ 5655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ 5665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ 5675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 7. pixel */ 5695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ 5705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 5715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload1] \n\t" 5725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[odd_dst]) \n\t" /* odd 5 */ 5735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 5745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ 5755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ 5765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ 5775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ 5785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ 5795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 8. pixel */ 5815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ 5825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ 5835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ 5845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ 5855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ 5865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ 5885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ 5895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ 5905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[odd_dst]) \n\t" /* odd 6 */ 5925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 5935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[odd_dst]) \n\t" /* odd 7 */ 5955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 5965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[odd_dst]) \n\t" /* odd 8 */ 5985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 5995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [qload1] "=&r" (qload1), [qload2] "=&r" (qload2), [p5] "=&r" (p5), 6005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [st1] "=&r" (st1), [st2] "=&r" (st2), [st3] "=&r" (st3), 6015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4), 6025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), 6035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [dst] "+r" (dst), [odd_dst] "+r" (odd_dst) 6045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [filter12] "r" (filter12), [filter34] "r" (filter34), 6055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [filter56] "r" (filter56), [filter78] "r" (filter78), 6065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector_64] "r" (vector_64), [cm] "r" (cm), 6075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [src] "r" (src), [dst_pitch_2] "r" (dst_pitch_2) 6085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 6095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += 16; 6115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = (dst_ptr + ((c + 1) * 16 * dst_stride)); 6125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang odd_dst = (dst + dst_stride); 6135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 6165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr += src_stride; 6175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr += 1; 6195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 6205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 6215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic void convolve_horiz_64_transposed_dspr2(const uint8_t *src_ptr, 6235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t src_stride, 6245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst_ptr, 6255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t dst_stride, 6265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x0, 6275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t h) { 6285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t c, y; 6295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const uint8_t *src; 6305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst; 6315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *cm = vp9_ff_cropTbl; 6325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t vector_64 = 64; 6335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t filter12, filter34, filter56, filter78; 6345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t Temp1, Temp2, Temp3; 6355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t qload1, qload2; 6365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t p1, p2, p3, p4, p5; 6375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t st1, st2, st3; 6385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t dst_pitch_2 = (dst_stride << 1); 6395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *odd_dst; 6405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter12 = ((const int32_t *)filter_x0)[0]; 6425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter34 = ((const int32_t *)filter_x0)[1]; 6435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter56 = ((const int32_t *)filter_x0)[2]; 6445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter78 = ((const int32_t *)filter_x0)[3]; 6455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--;) { 6475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 6485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride); 6495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride + 32); 6505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src_ptr + src_stride + 64); 6515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src = src_ptr; 6535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = dst_ptr; 6545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang odd_dst = (dst + dst_stride); 6565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (c = 0; c < 4; c++) { 6585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 6595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 0(%[src]) \n\t" 6605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 4(%[src]) \n\t" 6615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 1. pixel */ 6635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ 6645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 6655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ 6665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 6675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 6685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 6695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 6705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 6715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 8(%[src]) \n\t" 6725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ 6735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ 6745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ 6755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ 6765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ 6775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 2. pixel */ 6795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ 6805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 6815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload2] \n\t" 6825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload2] \n\t" 6835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 12(%[src]) \n\t" 6845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ 6855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ 6865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ 6875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ 6885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ 6895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ 6905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 6915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 3. pixel */ 6925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ 6935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 6945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 6955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[dst]) \n\t" /* even 1 */ 6965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 6975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ 6985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ 6995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ 7005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ 7015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ 7025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ 7035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 4. pixel */ 7055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ 7065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 7075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 7085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* even 2 */ 7095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 7105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 16(%[src]) \n\t" 7115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ 7125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ 7135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ 7145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ 7155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ 7165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ 7175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 5. pixel */ 7195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ 7205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 7215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 7225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[dst]) \n\t" /* even 3 */ 7235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 7245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ 7255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ 7265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ 7275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ 7285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ 7295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ 7305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 6. pixel */ 7325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ 7335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 7345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 7355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[dst]) \n\t" /* even 4 */ 7365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 7375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 20(%[src]) \n\t" 7385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ 7395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ 7405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ 7415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ 7425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ 7435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ 7445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 7. pixel */ 7465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ 7475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 7485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload1] \n\t" 7495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* even 5 */ 7505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 7515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ 7525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ 7535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ 7545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ 7555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ 7565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ 7575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* even 8. pixel */ 7595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ 7605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 7615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ 7625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ 7635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[dst]) \n\t" /* even 6 */ 7645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 7655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ 7665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ 7675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ 7685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ 7695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* ODD pixels */ 7715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 1(%[src]) \n\t" 7725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 5(%[src]) \n\t" 7735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 1. pixel */ 7755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ 7765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 7775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload1] \n\t" 7785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p2], %[qload1] \n\t" 7795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p3], %[qload2] \n\t" 7805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p4], %[qload2] \n\t" 7815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[dst]) \n\t" /* even 7 */ 7825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[dst], %[dst], %[dst_pitch_2] \n\t" 7835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 9(%[src]) \n\t" 7845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ 7855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ 7865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ 7875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ 7885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ 7895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ 7905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 7915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 2. pixel */ 7925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ 7935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 7945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p1], %[qload2] \n\t" 7955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p5], %[qload2] \n\t" 7965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[dst]) \n\t" /* even 8 */ 7975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 13(%[src]) \n\t" 7985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ 7995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ 8005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ 8015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ 8025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ 8035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ 8045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 3. pixel */ 8065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ 8075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 8085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p2], %[qload1] \n\t" 8095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[odd_dst]) \n\t" /* odd 1 */ 8105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 8115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ 8125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ 8135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ 8145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ 8155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ 8165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ 8175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 4. pixel */ 8195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ 8205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 8215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p3], %[qload1] \n\t" 8225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[odd_dst]) \n\t" /* odd 2 */ 8235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 8245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload2], 17(%[src]) \n\t" 8255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ 8265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ 8275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ 8285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ 8295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ 8305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ 8315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 5. pixel */ 8335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ 8345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac2 \n\t" 8355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p4], %[qload2] \n\t" 8365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[odd_dst]) \n\t" /* odd 3 */ 8375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 8385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ 8395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ 8405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ 8415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ 8425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ 8435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ 8445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 6. pixel */ 8465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ 8475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac3 \n\t" 8485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbl %[p1], %[qload2] \n\t" 8495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[odd_dst]) \n\t" /* odd 4 */ 8505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 8515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[qload1], 21(%[src]) \n\t" 8525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ 8535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ 8545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ 8555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ 8565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ 8575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ 8585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 7. pixel */ 8605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ 8615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "mthi $zero, $ac1 \n\t" 8625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "preceu.ph.qbr %[p5], %[qload1] \n\t" 8635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[odd_dst]) \n\t" /* odd 5 */ 8645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 8655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ 8665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ 8675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ 8685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ 8695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ 8705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* odd 8. pixel */ 8725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ 8735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ 8745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ 8755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ 8765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ 8775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ 8795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ 8805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ 8815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st2], 0(%[odd_dst]) \n\t" /* odd 6 */ 8835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 8845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st3], 0(%[odd_dst]) \n\t" /* odd 7 */ 8865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" 8875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sb %[st1], 0(%[odd_dst]) \n\t" /* odd 8 */ 8895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 8905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [qload1] "=&r" (qload1), [qload2] "=&r" (qload2), [p5] "=&r" (p5), 8915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [st1] "=&r" (st1), [st2] "=&r" (st2), [st3] "=&r" (st3), 8925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4), 8935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), 8945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [dst] "+r" (dst), [odd_dst] "+r" (odd_dst) 8955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [filter12] "r" (filter12), [filter34] "r" (filter34), 8965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [filter56] "r" (filter56), [filter78] "r" (filter78), 8975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [vector_64] "r" (vector_64), [cm] "r" (cm), 8985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [src] "r" (src), [dst_pitch_2] "r" (dst_pitch_2) 8995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 9005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += 16; 9025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst = (dst_ptr + ((c + 1) * 16 * dst_stride)); 9035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang odd_dst = (dst + dst_stride); 9045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 9055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* Next row... */ 9075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src_ptr += src_stride; 9085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst_ptr += 1; 9105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 9115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 9125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid convolve_horiz_transposed(const uint8_t *src, ptrdiff_t src_stride, 9145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 9155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter, int w, int h) { 9165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int x, y, k; 9175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = 0; y < h; ++y) { 9195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (x = 0; x < w; ++x) { 9205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int sum = 0; 9215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (k = 0; k < 8; ++k) 9235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang sum += src[x + k] * filter[k]; 9245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst[x * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); 9265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 9275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 9295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 1; 9305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 9315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 9325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid copy_horiz_transposed(const uint8_t *src, ptrdiff_t src_stride, 9345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 9355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int w, int h) { 9365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int x, y; 9375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = 0; y < h; ++y) { 9395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (x = 0; x < w; ++x) { 9405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst[x * dst_stride] = src[x]; 9415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 9425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 9445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += 1; 9455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 9465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 9475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, 9495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 9505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x, int x_step_q4, 9515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_y, int y_step_q4, 9525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int w, int h) { 9535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang DECLARE_ALIGNED_ARRAY(32, uint8_t, temp, 64 * 135); 9545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7; 9555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t pos = 38; 9565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* bit positon for extract from acc */ 9585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 9595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "wrdsp %[pos], 1 \n\t" 9605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : 9615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [pos] "r" (pos) 9625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 9635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (intermediate_height < h) 9655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang intermediate_height = h; 9665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (x_step_q4 != 16 || y_step_q4 != 16) 9685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang return vp9_convolve8_c(src, src_stride, 9695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 9705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 9715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 9725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 9735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if ((((const int32_t *)filter_x)[1] == 0x800000) 9755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang && (((const int32_t *)filter_y)[1] == 0x800000)) 9765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang return vp9_convolve_copy(src, src_stride, 9775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 9785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, x_step_q4, 9795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, y_step_q4, 9805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, h); 9815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* copy the src to dst */ 9835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (filter_x[3] == 0x80) { 9845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang copy_horiz_transposed(src - src_stride * 3, src_stride, 9855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp, intermediate_height, 9865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, intermediate_height); 9875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else if (((const int32_t *)filter_x)[0] == 0) { 9885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve2_dspr2(src - src_stride * 3, src_stride, 9895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp, intermediate_height, 9905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, 9915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang w, intermediate_height); 9925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 9935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src -= (src_stride * 3 + 3); 9945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 9965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src); 9975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + 32); 9985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 9995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang switch (w) { 10005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 4: 10015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_4_transposed_dspr2(src, src_stride, 10025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp, intermediate_height, 10035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, intermediate_height); 10045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 8: 10065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_8_transposed_dspr2(src, src_stride, 10075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp, intermediate_height, 10085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, intermediate_height); 10095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 16: 10115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 32: 10125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_16_transposed_dspr2(src, src_stride, 10135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp, intermediate_height, 10145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, intermediate_height, 10155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang (w/16)); 10165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 64: 10185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + 32); 10195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_64_transposed_dspr2(src, src_stride, 10205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp, intermediate_height, 10215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, intermediate_height); 10225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang default: 10245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_transposed(src, src_stride, 10255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang temp, intermediate_height, 10265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_x, w, intermediate_height); 10275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 10295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 10305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 10315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* copy the src to dst */ 10325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang if (filter_y[3] == 0x80) { 10335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang copy_horiz_transposed(temp + 3, intermediate_height, 10345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang h, w); 10365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else if (((const int32_t *)filter_y)[0] == 0) { 10375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_convolve2_dspr2(temp + 3, intermediate_height, 10385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, 10405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang h, w); 10415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } else { 10425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang switch (h) { 10435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 4: 10445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_4_transposed_dspr2(temp, intermediate_height, 10455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, w); 10475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 8: 10495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_8_transposed_dspr2(temp, intermediate_height, 10505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, w); 10525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 16: 10545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 32: 10555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_16_transposed_dspr2(temp, intermediate_height, 10565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, w, (h/16)); 10585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 64: 10605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_64_transposed_dspr2(temp, intermediate_height, 10615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, w); 10635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang default: 10655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang convolve_horiz_transposed(temp, intermediate_height, 10665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst, dst_stride, 10675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang filter_y, h, w); 10685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 10695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 10705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 10715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 10725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 10735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangvoid vp9_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride, 10745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint8_t *dst, ptrdiff_t dst_stride, 10755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_x, int filter_x_stride, 10765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang const int16_t *filter_y, int filter_y_stride, 10775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int w, int h) { 10785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang int x, y; 10795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 10805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* prefetch data to cache memory */ 10815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src); 10825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + 32); 10835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst); 10845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 10855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang switch (w) { 10865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 4: 10875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { 10885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1; 10895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 10905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 1 word storage */ 10915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 10925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 10935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 10945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 10955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 10965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 10975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], (%[src]) \n\t" 10985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp1], (%[dst]) \n\t" /* store */ 10995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1) 11015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 11025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 11035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 11055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 11065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 11075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 11085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 11095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 8: 11105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { 11115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2; 11125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 2 word storage */ 11145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 11155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 11165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 11175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 11185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 11205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 11215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 11225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp1], 0(%[dst]) \n\t" /* store */ 11235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp2], 4(%[dst]) \n\t" /* store */ 11245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2) 11265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 11275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 11285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 11305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 11315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 11325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 11335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 11345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 16: 11355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { 11365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2, tp3, tp4; 11375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 4 word storage */ 11395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 11405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 11415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 11425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 11435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 11455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 11465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 11475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 8(%[src]) \n\t" 11485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 12(%[src]) \n\t" 11495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp1], 0(%[dst]) \n\t" /* store */ 11515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp2], 4(%[dst]) \n\t" /* store */ 11525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp3], 8(%[dst]) \n\t" /* store */ 11535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp4], 12(%[dst]) \n\t" /* store */ 11545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 11565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp3] "=&r" (tp3), [tp4] "=&r" (tp4) 11575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 11585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 11595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 11615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 11625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 11635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 11645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 11655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 32: 11665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { 11675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2, tp3, tp4; 11685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp5, tp6, tp7, tp8; 11695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 8 word storage */ 11715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 11725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 11735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 11745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 11755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 11775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 11785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 11795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 8(%[src]) \n\t" 11805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 12(%[src]) \n\t" 11815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp5], 16(%[src]) \n\t" 11825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp6], 20(%[src]) \n\t" 11835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp7], 24(%[src]) \n\t" 11845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp8], 28(%[src]) \n\t" 11855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp1], 0(%[dst]) \n\t" /* store */ 11875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp2], 4(%[dst]) \n\t" /* store */ 11885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp3], 8(%[dst]) \n\t" /* store */ 11895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp4], 12(%[dst]) \n\t" /* store */ 11905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp5], 16(%[dst]) \n\t" /* store */ 11915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp6], 20(%[dst]) \n\t" /* store */ 11925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp7], 24(%[dst]) \n\t" /* store */ 11935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp8], 28(%[dst]) \n\t" /* store */ 11945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 11955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 11965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 11975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp5] "=&r" (tp5), [tp6] "=&r" (tp6), 11985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp7] "=&r" (tp7), [tp8] "=&r" (tp8) 11995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 12005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 12015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 12035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 12045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 12055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 12065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 12075ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang case 64: 12085ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang { 12095ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp1, tp2, tp3, tp4; 12105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang uint32_t tp5, tp6, tp7, tp8; 12115ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12125ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + 64); 12135ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + 32); 12145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12155ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang /* 16 word storage */ 12165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 12175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride); 12185ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 32); 12195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_load(src + src_stride + 64); 12205ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride); 12215ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang vp9_prefetch_store(dst + dst_stride + 32); 12225ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12235ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang __asm__ __volatile__ ( 12245ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 0(%[src]) \n\t" 12255ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 4(%[src]) \n\t" 12265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 8(%[src]) \n\t" 12275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 12(%[src]) \n\t" 12285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp5], 16(%[src]) \n\t" 12295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp6], 20(%[src]) \n\t" 12305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp7], 24(%[src]) \n\t" 12315ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp8], 28(%[src]) \n\t" 12325ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp1], 0(%[dst]) \n\t" /* store */ 12345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp2], 4(%[dst]) \n\t" /* store */ 12355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp3], 8(%[dst]) \n\t" /* store */ 12365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp4], 12(%[dst]) \n\t" /* store */ 12375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp5], 16(%[dst]) \n\t" /* store */ 12385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp6], 20(%[dst]) \n\t" /* store */ 12395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp7], 24(%[dst]) \n\t" /* store */ 12405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp8], 28(%[dst]) \n\t" /* store */ 12415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp1], 32(%[src]) \n\t" 12435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp2], 36(%[src]) \n\t" 12445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp3], 40(%[src]) \n\t" 12455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp4], 44(%[src]) \n\t" 12465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp5], 48(%[src]) \n\t" 12475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp6], 52(%[src]) \n\t" 12485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp7], 56(%[src]) \n\t" 12495ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "ulw %[tp8], 60(%[src]) \n\t" 12505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp1], 32(%[dst]) \n\t" /* store */ 12525ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp2], 36(%[dst]) \n\t" /* store */ 12535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp3], 40(%[dst]) \n\t" /* store */ 12545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp4], 44(%[dst]) \n\t" /* store */ 12555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp5], 48(%[dst]) \n\t" /* store */ 12565ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp6], 52(%[dst]) \n\t" /* store */ 12575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp7], 56(%[dst]) \n\t" /* store */ 12585ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang "sw %[tp8], 60(%[dst]) \n\t" /* store */ 12595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 12615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp3] "=&r" (tp3), [tp4] "=&r" (tp4), 12625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp5] "=&r" (tp5), [tp6] "=&r" (tp6), 12635ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang [tp7] "=&r" (tp7), [tp8] "=&r" (tp8) 12645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang : [src] "r" (src), [dst] "r" (dst) 12655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang ); 12665ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12675ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 12685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 12695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 12705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 12715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 12725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang default: 12735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (y = h; y--; ) { 12745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang for (x = 0; x < w; ++x) { 12755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst[x] = src[x]; 12765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 12775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang 12785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang src += src_stride; 12795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang dst += dst_stride; 12805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 12815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang break; 12825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang } 12835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang} 12845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang#endif 1285