12f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan/* 21b362b15af34006e6a11974088a46d42b903418eJohann * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 32f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * 42f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * Use of this source code is governed by a BSD-style license 52f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * that can be found in the LICENSE file in the root of the source 62f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * tree. An additional intellectual property rights grant can be found 72f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * in the file PATENTS. All contributing project authors may 82f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * be found in the AUTHORS file in the root of the source tree. 92f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan */ 102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 121b362b15af34006e6a11974088a46d42b903418eJohann#include <stdlib.h> 13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp8_rtcd.h" 141b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_ports/mem.h" 152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 161b362b15af34006e6a11974088a46d42b903418eJohann#if HAVE_DSPR2 172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan#define CROP_WIDTH 256 181b362b15af34006e6a11974088a46d42b903418eJohannunsigned char ff_cropTbl[256 + 2 * CROP_WIDTH]; 192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const unsigned short sub_pel_filterss[8][3] = 212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0, 0}, 232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0x0601, 0x7b0c}, 242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0201, 0x0b08, 0x6c24}, 252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0x0906, 0x5d32}, 262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0303, 0x1010, 0x4d4d}, 272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0x0609, 0x325d}, 282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0102, 0x080b, 0x246c}, 292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0x0106, 0x0c7b}, 302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}; 312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 321b362b15af34006e6a11974088a46d42b903418eJohann 332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const int sub_pel_filters_int[8][3] = 342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0, 0}, 362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0000fffa, 0x007b000c, 0xffff0000}, 372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0002fff5, 0x006c0024, 0xfff80001}, 382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0000fff7, 0x005d0032, 0xfffa0000}, 392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0003fff0, 0x004d004d, 0xfff00003}, 402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0000fffa, 0x0032005d, 0xfff70000}, 412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0001fff8, 0x0024006c, 0xfff50002}, 422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0000ffff, 0x000c007b, 0xfffa0000}, 432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}; 442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 451b362b15af34006e6a11974088a46d42b903418eJohann 462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const int sub_pel_filters_inv[8][3] = 472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0, 0}, 492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xfffa0000, 0x000c007b, 0x0000ffff}, 502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xfff50002, 0x0024006c, 0x0001fff8}, 512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xfff70000, 0x0032005d, 0x0000fffa}, 522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xfff00003, 0x004d004d, 0x0003fff0}, 532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xfffa0000, 0x005d0032, 0x0000fff7}, 542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xfff80001, 0x006c0024, 0x0002fff5}, 552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xffff0000, 0x007b000c, 0x0000fffa}, 562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}; 572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 581b362b15af34006e6a11974088a46d42b903418eJohann 592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const int sub_pel_filters_int_tap_4[8][2] = 602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0}, 622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xfffa007b, 0x000cffff}, 632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0}, 642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xfff7005d, 0x0032fffa}, 652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0}, 662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xfffa0032, 0x005dfff7}, 672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0}, 682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0xffff000c, 0x007bfffa}, 692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}; 702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 711b362b15af34006e6a11974088a46d42b903418eJohann 722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const int sub_pel_filters_inv_tap_4[8][2] = 732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0}, 752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x007bfffa, 0xffff000c}, 762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0}, 772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x005dfff7, 0xfffa0032}, 782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0}, 792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x0032fffa, 0xfff7005d}, 802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0, 0}, 812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 0x000cffff, 0xfffa007b}, 822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}; 832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 841b362b15af34006e6a11974088a46d42b903418eJohanninline void prefetch_load(unsigned char *src) 851b362b15af34006e6a11974088a46d42b903418eJohann{ 862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "pref 0, 0(%[src]) \n\t" 882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src] "r" (src) 902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 941b362b15af34006e6a11974088a46d42b903418eJohanninline void prefetch_store(unsigned char *dst) 951b362b15af34006e6a11974088a46d42b903418eJohann{ 962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "pref 1, 0(%[dst]) \n\t" 982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [dst] "r" (dst) 1002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 1012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 1022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid dsputil_static_init(void) 1042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 1052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int i; 1062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1071b362b15af34006e6a11974088a46d42b903418eJohann for (i = 0; i < 256; i++) ff_cropTbl[i + CROP_WIDTH] = i; 1081b362b15af34006e6a11974088a46d42b903418eJohann 1091b362b15af34006e6a11974088a46d42b903418eJohann for (i = 0; i < CROP_WIDTH; i++) 1101b362b15af34006e6a11974088a46d42b903418eJohann { 1112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ff_cropTbl[i] = 0; 1122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ff_cropTbl[i + CROP_WIDTH + 256] = 255; 1132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 1142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 1152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass_4 1172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 1181b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 1191b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst_ptr, 1202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int src_pixels_per_line, 1212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int output_height, 1222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int xoffset, 1232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int pitch 1242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 1252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 1262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int i; 1272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int Temp1, Temp2, Temp3, Temp4; 1282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int vector4a = 64; 1302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int vector1b, vector2b, vector3b; 1312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int tp1, tp2, tn1, tn2; 1322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int p1, p2, p3; 1332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int n1, n2, n3; 1342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char *cm = ff_cropTbl + CROP_WIDTH; 1352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector3b = sub_pel_filters_inv[xoffset][2]; 1372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1381b362b15af34006e6a11974088a46d42b903418eJohann /* if (xoffset == 0) we don't need any filtering */ 1391b362b15af34006e6a11974088a46d42b903418eJohann if (vector3b == 0) 1401b362b15af34006e6a11974088a46d42b903418eJohann { 1412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = 0; i < output_height; i++) 1422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 1432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 1442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr + src_pixels_per_line); 1452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[0] = src_ptr[0]; 1462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[1] = src_ptr[1]; 1472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[2] = src_ptr[2]; 1482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[3] = src_ptr[3]; 1492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* next row... */ 1511b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += src_pixels_per_line; 1522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr += 4; 1532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 1542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 1551b362b15af34006e6a11974088a46d42b903418eJohann else 1561b362b15af34006e6a11974088a46d42b903418eJohann { 1571b362b15af34006e6a11974088a46d42b903418eJohann if (vector3b > 65536) 1581b362b15af34006e6a11974088a46d42b903418eJohann { 1592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 6 tap filter */ 1602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector1b = sub_pel_filters_inv[xoffset][0]; 1622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector2b = sub_pel_filters_inv[xoffset][1]; 1632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 1652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr + src_pixels_per_line); 1662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = output_height; i--;) 1682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 1692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 1702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 1712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], -2(%[src_ptr]) \n\t" 1722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp2], 2(%[src_ptr]) \n\t" 1732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 1. pixel */ 1752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 1762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p1], %[tp1] \n\t" 1772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p2], %[tp1] \n\t" 1782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p3], %[tp2] \n\t" 1792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 1802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 1812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 1822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 2. pixel */ 1842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 1852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p1], %[tp2] \n\t" 1862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "balign %[tp2], %[tp1], 3 \n\t" 1872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 1882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 1892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 1902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" 1912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 1. pixel */ 1932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn2], 3(%[src_ptr]) \n\t" 1942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 1952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n1], %[tp2] \n\t" 1962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n2], %[tp2] \n\t" 1972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n3], %[tn2] \n\t" 1982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 1992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 2002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 2012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" 2022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 2. pixel */ 2042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 2052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n1], %[tn2] \n\t" 2062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 2072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" 2082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" 2092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" 2102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 2112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp */ 2132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tp1], %[Temp1](%[cm]) \n\t" 2142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tn1], %[Temp2](%[cm]) \n\t" 2152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tp2], %[Temp3](%[cm]) \n\t" 2162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[n2], %[Temp4](%[cm]) \n\t" 2172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* store bytes */ 2192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tp1], 0(%[dst_ptr]) \n\t" 2202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tn1], 1(%[dst_ptr]) \n\t" 2212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tp2], 2(%[dst_ptr]) \n\t" 2222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[n2], 3(%[dst_ptr]) \n\t" 2232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), 2252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2), 2262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p3] "=&r" (p3), [n1] "=&r" (n1), [n2] "=&r" (n2), 2272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n3] "=&r" (n3), [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 2282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) 2292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 2302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), 2312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector3b] "r" (vector3b), [src_ptr] "r" (src_ptr) 2322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 2332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* Next row... */ 2351b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += src_pixels_per_line; 2362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr += pitch; 2372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 2382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 2391b362b15af34006e6a11974088a46d42b903418eJohann else 2401b362b15af34006e6a11974088a46d42b903418eJohann { 2412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 4 tap filter */ 2422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; 2442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; 2452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = output_height; i--;) 2472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 2482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 2492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 2502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], -1(%[src_ptr]) \n\t" 2512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp2], 3(%[src_ptr]) \n\t" 2522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 1. pixel */ 2542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 2552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p1], %[tp1] \n\t" 2562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p2], %[tp1] \n\t" 2572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p3], %[tp2] \n\t" 2582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 2592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 2602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 2. pixel */ 2622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 2632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 2642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 2652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 2662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 1. pixel */ 2682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "srl %[tn1], %[tp2], 8 \n\t" 2692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "balign %[tp2], %[tp1], 3 \n\t" 2702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 2712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n1], %[tp2] \n\t" 2722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n2], %[tp2] \n\t" 2732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n3], %[tn1] \n\t" 2742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 2752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 2762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 2772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 2. pixel */ 2792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 2802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 2812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" 2822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" 2832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 2842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 2862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tp1], %[Temp1](%[cm]) \n\t" 2872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tn1], %[Temp2](%[cm]) \n\t" 2882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tp2], %[Temp3](%[cm]) \n\t" 2892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tp1], 0(%[dst_ptr]) \n\t" 2902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tn1], 1(%[dst_ptr]) \n\t" 2912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[n2], %[Temp4](%[cm]) \n\t" 2922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tp2], 2(%[dst_ptr]) \n\t" 2932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[n2], 3(%[dst_ptr]) \n\t" 2942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 2952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), 2962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), 2972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), 2982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 2992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) 3002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 3012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), 3022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr] "r" (src_ptr) 3032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 3042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* Next row... */ 3051b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += src_pixels_per_line; 3062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr += pitch; 3072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 3082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 3092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 3102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 3112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass_8_all 3132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 3141b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 3151b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst_ptr, 3162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int src_pixels_per_line, 3172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int output_height, 3182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int xoffset, 3192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int pitch 3202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 3212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 3222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int i; 3232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int Temp1, Temp2, Temp3, Temp4; 3242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3251b362b15af34006e6a11974088a46d42b903418eJohann unsigned int vector4a = 64; 3262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int vector1b, vector2b, vector3b; 3272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int tp1, tp2, tn1, tn2; 3282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int p1, p2, p3, p4; 3292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int n1, n2, n3, n4; 3302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char *cm = ff_cropTbl + CROP_WIDTH; 3322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3331b362b15af34006e6a11974088a46d42b903418eJohann /* if (xoffset == 0) we don't need any filtering */ 3341b362b15af34006e6a11974088a46d42b903418eJohann if (xoffset == 0) 3351b362b15af34006e6a11974088a46d42b903418eJohann { 3362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = 0; i < output_height; i++) 3372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 3382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 3392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr + src_pixels_per_line); 3402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[0] = src_ptr[0]; 3422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[1] = src_ptr[1]; 3432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[2] = src_ptr[2]; 3442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[3] = src_ptr[3]; 3452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[4] = src_ptr[4]; 3462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[5] = src_ptr[5]; 3472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[6] = src_ptr[6]; 3482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[7] = src_ptr[7]; 3492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* next row... */ 3511b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += src_pixels_per_line; 3522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr += 8; 3532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 3542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 3551b362b15af34006e6a11974088a46d42b903418eJohann else 3561b362b15af34006e6a11974088a46d42b903418eJohann { 3572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector3b = sub_pel_filters_inv[xoffset][2]; 3582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3591b362b15af34006e6a11974088a46d42b903418eJohann if (vector3b > 65536) 3601b362b15af34006e6a11974088a46d42b903418eJohann { 3612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 6 tap filter */ 3622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector1b = sub_pel_filters_inv[xoffset][0]; 3642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector2b = sub_pel_filters_inv[xoffset][1]; 3652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = output_height; i--;) 3672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 3682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 3692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr + src_pixels_per_line); 3702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 3722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 3732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], -2(%[src_ptr]) \n\t" 3742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp2], 2(%[src_ptr]) \n\t" 3752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 1. pixel */ 3772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 3782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p1], %[tp1] \n\t" 3792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p2], %[tp1] \n\t" 3802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p3], %[tp2] \n\t" 3812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 3822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 3832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 3842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 2. pixel */ 3862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 3872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p1], %[tp2] \n\t" 3882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 3892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 3902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" 3912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "balign %[tp2], %[tp1], 3 \n\t" 3932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 3942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn2], 3(%[src_ptr]) \n\t" 3952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 3962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 1. pixel */ 3972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 3982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n1], %[tp2] \n\t" 3992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n2], %[tp2] \n\t" 4002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n3], %[tn2] \n\t" 4012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 4022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 4032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 4042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" 4052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 2. pixel */ 4072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 4082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n1], %[tn2] \n\t" 4092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" 4102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" 4112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" 4122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], 6(%[src_ptr]) \n\t" 4132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 4142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 4152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p2], %[tp1] \n\t" 4162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 4172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2), 4192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), 4202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), 4212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 4222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) 4232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 4242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), 4252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr] "r" (src_ptr) 4262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 4272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 4292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[0] = cm[Temp1]; 4302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[1] = cm[Temp2]; 4312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[2] = cm[Temp3]; 4322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[3] = cm[Temp4]; 4332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* next 4 pixels */ 4352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 4362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 3. pixel */ 4372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" 4382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" 4392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" 4402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 4. pixel */ 4422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 4432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p4], %[tp1] \n\t" 4442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" 4452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" 4462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" 4472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn1], 7(%[src_ptr]) \n\t" 4492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 4502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 3. pixel */ 4522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 4532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n2], %[tn1] \n\t" 4542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" 4552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" 4562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" 4572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 4582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 4. pixel */ 4602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 4612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n4], %[tn1] \n\t" 4622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" 4632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" 4642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" 4652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 4662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 4672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tn1] "=&r" (tn1), [n2] "=&r" (n2), 4692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p4] "=&r" (p4), [n4] "=&r" (n4), 4702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 4712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) 4722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp1] "r" (tp1), [vector1b] "r" (vector1b), [p2] "r" (p2), 4732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector2b] "r" (vector2b), [n1] "r" (n1), [p1] "r" (p1), 4742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), 4752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr) 4762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 4772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 4792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[4] = cm[Temp1]; 4802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[5] = cm[Temp2]; 4812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[6] = cm[Temp3]; 4822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[7] = cm[Temp4]; 4832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4841b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += src_pixels_per_line; 4852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr += pitch; 4862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 4872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 4881b362b15af34006e6a11974088a46d42b903418eJohann else 4891b362b15af34006e6a11974088a46d42b903418eJohann { 4902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 4 tap filter */ 4912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; 4932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; 4942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = output_height; i--;) 4962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 4972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 4982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr + src_pixels_per_line); 4992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 5012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 5022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], -1(%[src_ptr]) \n\t" 5032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 1. pixel */ 5052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 5062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p1], %[tp1] \n\t" 5072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p2], %[tp1] \n\t" 5082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 5092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 5102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp2], 3(%[src_ptr]) \n\t" 5122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 2. pixel */ 5142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 5152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p3], %[tp2] \n\t" 5162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p4], %[tp2] \n\t" 5172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 5182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 5192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 5202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "balign %[tp2], %[tp1], 3 \n\t" 5222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 1. pixel */ 5242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 5252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n1], %[tp2] \n\t" 5262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n2], %[tp2] \n\t" 5272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 5282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 5292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 5302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn2], 4(%[src_ptr]) \n\t" 5322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 2. pixel */ 5342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 5352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n3], %[tn2] \n\t" 5362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n4], %[tn2] \n\t" 5372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" 5382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" 5392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], 7(%[src_ptr]) \n\t" 5402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 5412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 5422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 5432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), 5452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2), 5462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p3] "=&r" (p3), [p4] "=&r" (p4), [n1] "=&r" (n1), 5472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n2] "=&r" (n2), [n3] "=&r" (n3), [n4] "=&r" (n4), 5482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 5492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) 5502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 5512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) 5522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 5532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 5552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[0] = cm[Temp1]; 5562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[1] = cm[Temp2]; 5572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[2] = cm[Temp3]; 5582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[3] = cm[Temp4]; 5592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* next 4 pixels */ 5612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 5622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 3. pixel */ 5632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" 5642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" 5652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 4. pixel */ 5672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 5682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p2], %[tp1] \n\t" 5692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" 5702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" 5712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 5722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 3. pixel */ 5742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 5752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" 5762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" 5772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn1], 8(%[src_ptr]) \n\t" 5782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 5792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 4. pixel */ 5812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 5822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n2], %[tn1] \n\t" 5832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" 5842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" 5852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 5862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 5872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), 5892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 5902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) 5912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp1] "r" (tp1), [p3] "r" (p3), [p4] "r" (p4), 5922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 5932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr), 5942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n3] "r" (n3), [n4] "r" (n4) 5952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 5962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 5972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 5982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[4] = cm[Temp1]; 5992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[5] = cm[Temp2]; 6002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[6] = cm[Temp3]; 6012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[7] = cm[Temp4]; 6022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* next row... */ 6041b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += src_pixels_per_line; 6052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr += pitch; 6062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 6072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 6082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 6092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 6102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass16_6tap 6132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 6141b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 6151b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst_ptr, 6162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int src_pixels_per_line, 6172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int output_height, 6182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int xoffset, 6192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int pitch 6202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 6212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 6222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int i; 6232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int Temp1, Temp2, Temp3, Temp4; 6242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int vector4a; 6262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int vector1b, vector2b, vector3b; 6272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int tp1, tp2, tn1, tn2; 6282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int p1, p2, p3, p4; 6292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int n1, n2, n3, n4; 6302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char *cm = ff_cropTbl + CROP_WIDTH; 6312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector1b = sub_pel_filters_inv[xoffset][0]; 6332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector2b = sub_pel_filters_inv[xoffset][1]; 6342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector3b = sub_pel_filters_inv[xoffset][2]; 6352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector4a = 64; 6362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = output_height; i--;) 6382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 6392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 6402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr + src_pixels_per_line); 6412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 6432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 6442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], -2(%[src_ptr]) \n\t" 6452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp2], 2(%[src_ptr]) \n\t" 6462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 1. pixel */ 6482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 6492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p1], %[tp1] \n\t" 6502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p2], %[tp1] \n\t" 6512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p3], %[tp2] \n\t" 6522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 6532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 6542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" 6552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 2. pixel */ 6572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 6582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p1], %[tp2] \n\t" 6592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 6602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 6612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" 6622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "balign %[tp2], %[tp1], 3 \n\t" 6642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn2], 3(%[src_ptr]) \n\t" 6652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 6662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 1. pixel */ 6682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 6692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n1], %[tp2] \n\t" 6702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n2], %[tp2] \n\t" 6712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n3], %[tn2] \n\t" 6722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 6732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 6742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 6752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" 6762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 2. pixel */ 6782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 6792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n1], %[tn2] \n\t" 6802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" 6812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" 6822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" 6832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], 6(%[src_ptr]) \n\t" 6842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 6852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 6862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p2], %[tp1] \n\t" 6872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 6882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2), 6902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), 6912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), 6922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 6932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) 6942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 6952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), 6962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr] "r" (src_ptr) 6972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 6982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 6992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 7002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[0] = cm[Temp1]; 7012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[1] = cm[Temp2]; 7022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[2] = cm[Temp3]; 7032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[3] = cm[Temp4]; 7042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* next 4 pixels */ 7062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 7072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 3. pixel */ 7082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" 7092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" 7102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" 7112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 4. pixel */ 7132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 7142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p4], %[tp1] \n\t" 7152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" 7162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" 7172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" 7182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn1], 7(%[src_ptr]) \n\t" 7192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 7202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 3. pixel */ 7222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 7232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n2], %[tn1] \n\t" 7242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" 7252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" 7262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" 7272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 7282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 4. pixel */ 7302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 7312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n4], %[tn1] \n\t" 7322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" 7332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" 7342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" 7352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp2], 10(%[src_ptr]) \n\t" 7362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 7372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 7382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p1], %[tp2] \n\t" 7392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 7402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tn1] "=&r" (tn1), [tp2] "=&r" (tp2), [n2] "=&r" (n2), 7422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p4] "=&r" (p4), [n4] "=&r" (n4), 7432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 7442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) 7452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 7462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [tp1] "r" (tp1), [n1] "r" (n1), [p1] "r" (p1), 7472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [p2] "r" (p2), [vector3b] "r" (vector3b), 7482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr) 7492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 7502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 7522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[4] = cm[Temp1]; 7532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[5] = cm[Temp2]; 7542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[6] = cm[Temp3]; 7552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[7] = cm[Temp4]; 7562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* next 4 pixels */ 7582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 7592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 5. pixel */ 7602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector1b] \n\t" 7612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" 7622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" 7632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 6. pixel */ 7652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 7662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p3], %[tp2] \n\t" 7672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" 7682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" 7692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p3], %[vector3b] \n\t" 7702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn1], 11(%[src_ptr]) \n\t" 7722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 7732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 5. pixel */ 7752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 7762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n1], %[tn1] \n\t" 7772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector1b] \n\t" 7782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" 7792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" 7802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 7812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 6. pixel */ 7832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 7842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n3], %[tn1] \n\t" 7852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" 7862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n1], %[vector2b] \n\t" 7872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n3], %[vector3b] \n\t" 7882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], 14(%[src_ptr]) \n\t" 7892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 7902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 7912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p4], %[tp1] \n\t" 7922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 7932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 7942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tn1] "=&r" (tn1), [tp1] "=&r" (tp1), 7952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n1] "=&r" (n1), [p3] "=&r" (p3), [n3] "=&r" (n3), 7962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 7972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) 7982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 7992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [tp2] "r" (tp2), [p2] "r" (p2), [n2] "r" (n2), 8002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p4] "r" (p4), [n4] "r" (n4), [p1] "r" (p1), [src_ptr] "r" (src_ptr), 8012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [vector3b] "r" (vector3b) 8022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 8032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 8052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[8] = cm[Temp1]; 8062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[9] = cm[Temp2]; 8072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[10] = cm[Temp3]; 8082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr[11] = cm[Temp4]; 8092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* next 4 pixels */ 8112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 8122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 7. pixel */ 8132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 8142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p3], %[vector2b] \n\t" 8152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p4], %[vector3b] \n\t" 8162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 8. pixel */ 8182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 8192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p2], %[tp1] \n\t" 8202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p3], %[vector1b] \n\t" 8212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p4], %[vector2b] \n\t" 8222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector3b] \n\t" 8231b362b15af34006e6a11974088a46d42b903418eJohann "ulw %[tn1], 15(%[src_ptr]) \n\t" 8242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac3, 9 \n\t" 8252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 7. pixel */ 8272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 8282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n4], %[tn1] \n\t" 8292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 8302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n3], %[vector2b] \n\t" 8312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n4], %[vector3b] \n\t" 8322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac2, 9 \n\t" 8332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 8. pixel */ 8352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 8362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n2], %[tn1] \n\t" 8372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n3], %[vector1b] \n\t" 8382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n4], %[vector2b] \n\t" 8392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector3b] \n\t" 8402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 8412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac2, 9 \n\t" 8422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 8442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tp1], %[Temp1](%[cm]) \n\t" 8452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tn1], %[Temp2](%[cm]) \n\t" 8462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[p2], %[Temp3](%[cm]) \n\t" 8472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tp1], 12(%[dst_ptr]) \n\t" 8482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tn1], 13(%[dst_ptr]) \n\t" 8492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[n2], %[Temp4](%[cm]) \n\t" 8502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[p2], 14(%[dst_ptr]) \n\t" 8512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[n2], 15(%[dst_ptr]) \n\t" 8522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), [n4] "=&r" (n4), 8542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 8552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4) 8562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 8572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [tp1] "r" (tp1), [p4] "r" (p4), [n1] "r" (n1), [p1] "r" (p1), 8582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), [p3] "r" (p3), 8592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n3] "r" (n3), [src_ptr] "r" (src_ptr), 8602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [cm] "r" (cm), [dst_ptr] "r" (dst_ptr) 8612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 8622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8631b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += src_pixels_per_line; 8642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr += pitch; 8652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 8662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 8672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass16_0 8702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 8711b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 8721b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT output_ptr, 8732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int src_pixels_per_line 8742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 8752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 8762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int Temp1, Temp2, Temp3, Temp4; 8772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int i; 8782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 8802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_store(output_ptr + 32); 8812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* copy memory from src buffer to dst buffer */ 8832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = 0; i < 7; i++) 8842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 8852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 8862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 8872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 8882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp3], 8(%[src_ptr]) \n\t" 8892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp4], 12(%[src_ptr]) \n\t" 8902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 0(%[output_ptr]) \n\t" 8912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 4(%[output_ptr]) \n\t" 8922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp3], 8(%[output_ptr]) \n\t" 8932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp4], 12(%[output_ptr]) \n\t" 8942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 8952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), 8972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) 8982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src_pixels_per_line] "r" (src_pixels_per_line), 8992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [output_ptr] "r" (output_ptr) 9002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 9012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 9032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 9042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 9052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp3], 8(%[src_ptr]) \n\t" 9062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp4], 12(%[src_ptr]) \n\t" 9072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 16(%[output_ptr]) \n\t" 9082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 20(%[output_ptr]) \n\t" 9092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp3], 24(%[output_ptr]) \n\t" 9102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp4], 28(%[output_ptr]) \n\t" 9112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 9122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), 9142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) 9152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src_pixels_per_line] "r" (src_pixels_per_line), 9162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [output_ptr] "r" (output_ptr) 9172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 9182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 9202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 9212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 9222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp3], 8(%[src_ptr]) \n\t" 9232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp4], 12(%[src_ptr]) \n\t" 9242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 32(%[output_ptr]) \n\t" 9252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 36(%[output_ptr]) \n\t" 9262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp3], 40(%[output_ptr]) \n\t" 9272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp4], 44(%[output_ptr]) \n\t" 9282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 9292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), 9312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr) 9322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src_pixels_per_line] "r" (src_pixels_per_line), 9332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [output_ptr] "r" (output_ptr) 9342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 9352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += 48; 9372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 9382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 9392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass16_4tap 9422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 9431b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 9441b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT output_ptr, 9452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int src_pixels_per_line, 9462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int output_width, 9472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int output_height, 9482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int xoffset, 9492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int yoffset, 9501b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst_ptr, 9512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int pitch 9522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 9532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 9542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int i, j; 9552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int Temp1, Temp2, Temp3, Temp4; 9562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int vector4a; 9582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int vector1b, vector2b; 9592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int tp1, tp2, tp3, tn1; 9602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int p1, p2, p3; 9612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int n1, n2, n3; 9622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char *cm = ff_cropTbl + CROP_WIDTH; 9632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector4a = 64; 9652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; 9672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; 9682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* if (yoffset == 0) don't need temp buffer, data will be stored in dst_ptr */ 9701b362b15af34006e6a11974088a46d42b903418eJohann if (yoffset == 0) 9711b362b15af34006e6a11974088a46d42b903418eJohann { 9722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_height -= 5; 9732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr += (src_pixels_per_line + src_pixels_per_line); 9742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = output_height; i--;) 9762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 9772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 9782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp3], -1(%[src_ptr]) \n\t" 9792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp3] "=&r" (tp3) 9802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src_ptr] "r" (src_ptr) 9812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 9822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* processing 4 adjacent pixels */ 9842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (j = 0; j < 16; j += 4) 9852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 9862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 9872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 9882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp2], 3(%[src_ptr]) \n\t" 9892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "move %[tp1], %[tp3] \n\t" 9902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 1. pixel */ 9922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 9932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mthi $0, $ac3 \n\t" 9942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "move %[tp3], %[tp2] \n\t" 9952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p1], %[tp1] \n\t" 9962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p2], %[tp1] \n\t" 9972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p3], %[tp2] \n\t" 9982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 9992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 10002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 2. pixel */ 10022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 10032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mthi $0, $ac2 \n\t" 10042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 10052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 10062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extr.w %[Temp1], $ac3, 7 \n\t" 10072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 1. pixel */ 10092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn1], 4(%[src_ptr]) \n\t" 10102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "balign %[tp2], %[tp1], 3 \n\t" 10112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 10122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mthi $0, $ac3 \n\t" 10132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n1], %[tp2] \n\t" 10142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n2], %[tp2] \n\t" 10152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n3], %[tn1] \n\t" 10162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extr.w %[Temp3], $ac2, 7 \n\t" 10172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 10182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 10192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 2. pixel */ 10212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 10222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mthi $0, $ac2 \n\t" 10232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extr.w %[Temp2], $ac3, 7 \n\t" 10242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" 10252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" 10262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extr.w %[Temp4], $ac2, 7 \n\t" 10272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 10292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tp1], %[Temp1](%[cm]) \n\t" 10302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tn1], %[Temp2](%[cm]) \n\t" 10312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tp2], %[Temp3](%[cm]) \n\t" 10322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tp1], 0(%[dst_ptr]) \n\t" 10332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tn1], 1(%[dst_ptr]) \n\t" 10342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[n2], %[Temp4](%[cm]) \n\t" 10352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tp2], 2(%[dst_ptr]) \n\t" 10362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[n2], 3(%[dst_ptr]) \n\t" 10372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tp3] "=&r" (tp3), 10392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [tn1] "=&r" (tn1), [p1] "=&r" (p1), [p2] "=&r" (p2), 10402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), 10412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [p3] "=&r" (p3), 10422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) 10432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 10442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr), 10452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr] "r" (src_ptr) 10462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 10472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10481b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += 4; 10492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 10502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* Next row... */ 10521b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += src_pixels_per_line - 16; 10532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst_ptr += pitch; 10542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 10552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 10561b362b15af34006e6a11974088a46d42b903418eJohann else 10571b362b15af34006e6a11974088a46d42b903418eJohann { 10582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = output_height; i--;) 10592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 10602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* processing 4 adjacent pixels */ 10612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (j = 0; j < 16; j += 4) 10622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 10632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 10642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 10652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp1], -1(%[src_ptr]) \n\t" 10662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tp2], 3(%[src_ptr]) \n\t" 10672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 1. pixel */ 10692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 10702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mthi $0, $ac3 \n\t" 10712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p1], %[tp1] \n\t" 10722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[p2], %[tp1] \n\t" 10732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[p3], %[tp2] \n\t" 10742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" 10752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" 10762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* even 2. pixel */ 10782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 10792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mthi $0, $ac2 \n\t" 10802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" 10812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" 10822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extr.w %[Temp1], $ac3, 7 \n\t" 10832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 1. pixel */ 10852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[tn1], 4(%[src_ptr]) \n\t" 10862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "balign %[tp2], %[tp1], 3 \n\t" 10872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 10882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mthi $0, $ac3 \n\t" 10892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n1], %[tp2] \n\t" 10902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbl %[n2], %[tp2] \n\t" 10912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "preceu.ph.qbr %[n3], %[tn1] \n\t" 10922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extr.w %[Temp3], $ac2, 7 \n\t" 10932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" 10942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" 10952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 10962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* odd 2. pixel */ 10972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 10982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mthi $0, $ac2 \n\t" 10992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extr.w %[Temp2], $ac3, 7 \n\t" 11002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" 11012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" 11022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extr.w %[Temp4], $ac2, 7 \n\t" 11032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 11052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tp1], %[Temp1](%[cm]) \n\t" 11062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tn1], %[Temp2](%[cm]) \n\t" 11072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[tp2], %[Temp3](%[cm]) \n\t" 11082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tp1], 0(%[output_ptr]) \n\t" 11092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tn1], 1(%[output_ptr]) \n\t" 11102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbux %[n2], %[Temp4](%[cm]) \n\t" 11112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[tp2], 2(%[output_ptr]) \n\t" 11122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sb %[n2], 3(%[output_ptr]) \n\t" 11132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1), 11152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), 11162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3), 11172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 11182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4) 11192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 11202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [cm] "r" (cm), 11212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [output_ptr] "r" (output_ptr), [src_ptr] "r" (src_ptr) 11222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 11232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11241b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += 4; 11252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 11262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* next row... */ 11281b362b15af34006e6a11974088a46d42b903418eJohann src_ptr += src_pixels_per_line; 11292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += output_width; 11302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 11312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 11322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 11332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_second_pass4 11362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 11371b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 11381b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT output_ptr, 11392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int output_pitch, 11402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int yoffset 11412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 11422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 11432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int i; 11442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int Temp1, Temp2, Temp3, Temp4; 11462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int vector1b, vector2b, vector3b, vector4a; 11472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_l2; 11492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_l1; 11502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_0; 11512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_r1; 11522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_r2; 11532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_r3; 11542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char *cm = ff_cropTbl + CROP_WIDTH; 11562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector4a = 64; 11582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* load filter coefficients */ 11602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector1b = sub_pel_filterss[yoffset][0]; 11612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector2b = sub_pel_filterss[yoffset][2]; 11622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector3b = sub_pel_filterss[yoffset][1]; 11632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11641b362b15af34006e6a11974088a46d42b903418eJohann if (vector1b) 11651b362b15af34006e6a11974088a46d42b903418eJohann { 11662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 6 tap filter */ 11672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = 2; i--;) 11692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 11701b362b15af34006e6a11974088a46d42b903418eJohann /* prefetch src_ptr data to cache memory */ 11711b362b15af34006e6a11974088a46d42b903418eJohann prefetch_load(src_ptr); 11722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* do not allow compiler to reorder instructions */ 11742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 11752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ".set noreorder \n\t" 11762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 11772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 11782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 11792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 11812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 11822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -8(%[src_ptr]) \n\t" 11832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" 11842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" 11852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" 11862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" 11872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 12(%[src_ptr]) \n\t" 11882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 11892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 11912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 11922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 11932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" 11942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 11952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 11962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 11972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -7(%[src_ptr]) \n\t" 11982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" 11992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" 12002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" 12012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" 12022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 13(%[src_ptr]) \n\t" 12032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 12042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac2, 9 \n\t" 12052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 12072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 12082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 12092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" 12102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 12112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 12122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -6(%[src_ptr]) \n\t" 12142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" 12152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" 12162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" 12172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" 12182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 14(%[src_ptr]) \n\t" 12192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 12202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 12212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 12232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 12242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 12252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" 12262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 12272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 12282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -5(%[src_ptr]) \n\t" 12302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" 12312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" 12322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" 12332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" 12342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 15(%[src_ptr]) \n\t" 12352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 12362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac0, 9 \n\t" 12372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 12392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 12402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 12412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" 12422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 12432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 12442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac1, 9 \n\t" 12452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 12472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), 12482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 12492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), 12502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) 12512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 12522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), 12532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr] "r" (src_ptr) 12542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 12552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 12572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[0] = cm[Temp1]; 12582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[1] = cm[Temp2]; 12592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[2] = cm[Temp3]; 12602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[3] = cm[Temp4]; 12612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += output_pitch; 12632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 12652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 12662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -4(%[src_ptr]) \n\t" 12672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" 12682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" 12692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" 12702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" 12712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 16(%[src_ptr]) \n\t" 12722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 12732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 12742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 12752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 12762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" 12772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 12782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 12792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -3(%[src_ptr]) \n\t" 12812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" 12822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" 12832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" 12842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" 12852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 17(%[src_ptr]) \n\t" 12862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 12872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac2, 9 \n\t" 12882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 12902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 12912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 12922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" 12932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 12942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 12952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 12962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -2(%[src_ptr]) \n\t" 12972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" 12982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" 12992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" 13002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" 13012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 18(%[src_ptr]) \n\t" 13022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 13032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 13042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 13062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 13072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 13082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" 13092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 13102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 13112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -1(%[src_ptr]) \n\t" 13132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" 13142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" 13152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" 13162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" 13172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 19(%[src_ptr]) \n\t" 13182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 13192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac0, 9 \n\t" 13202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 13222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 13232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 13242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" 13252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 13262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 13272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac1, 9 \n\t" 13282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 13302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), 13312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 13322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), 13332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) 13342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 13352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), 13362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr] "r" (src_ptr) 13372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 13382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 13402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[0] = cm[Temp1]; 13412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[1] = cm[Temp2]; 13422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[2] = cm[Temp3]; 13432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[3] = cm[Temp4]; 13442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr += 8; 13462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += output_pitch; 13472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 13482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 13491b362b15af34006e6a11974088a46d42b903418eJohann else 13501b362b15af34006e6a11974088a46d42b903418eJohann { 13512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 4 tap filter */ 13522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 13542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr); 13552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = 2; i--;) 13572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 13582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* do not allow compiler to reorder instructions */ 13592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 13601b362b15af34006e6a11974088a46d42b903418eJohann ".set noreorder \n\t" 13612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 13622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 13632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 13642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 13662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 13672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" 13682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" 13692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" 13702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" 13712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 13722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 13732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 13742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 13752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 13762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" 13782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" 13792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" 13802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" 13812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 13822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac2, 9 \n\t" 13832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 13852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 13862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 13872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 13882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" 13902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" 13912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" 13922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" 13932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 13942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 13952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 13972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 13982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 13992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 14002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" 14022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" 14032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" 14042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" 14052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 14062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac0, 9 \n\t" 14072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 14082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 14092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 14102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 14112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac1, 9 \n\t" 14122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 14142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), 14152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 14162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) 14172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 14182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) 14192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 14202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 14222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[0] = cm[Temp1]; 14232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[1] = cm[Temp2]; 14242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[2] = cm[Temp3]; 14252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[3] = cm[Temp4]; 14262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += output_pitch; 14282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 14302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 14312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" 14322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" 14332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" 14342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" 14352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 14362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 14372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 14382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 14392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 14402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" 14422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" 14432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" 14442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" 14452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 14462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac2, 9 \n\t" 14472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 14492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 14502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 14512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 14522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" 14542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" 14552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" 14562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" 14572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 14582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 14592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 14612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 14622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 14632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 14642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" 14662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" 14672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" 14682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" 14692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 14702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac0, 9 \n\t" 14712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 14722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 14732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 14742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 14752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac1, 9 \n\t" 14762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 14782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4), 14792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 14802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) 14812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 14822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) 14832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 14842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 14862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[0] = cm[Temp1]; 14872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[1] = cm[Temp2]; 14882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[2] = cm[Temp3]; 14892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[3] = cm[Temp4]; 14902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr += 8; 14922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += output_pitch; 14932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 14942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 14952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 14962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 14982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_second_pass_8 14992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 15001b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 15011b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT output_ptr, 15022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int output_pitch, 15032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int output_height, 15042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int output_width, 15052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int yoffset 15062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 15072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 15081b362b15af34006e6a11974088a46d42b903418eJohann unsigned int i; 15092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; 15112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int vector1b, vector2b, vector3b, vector4a; 15122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_l2; 15142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_l1; 15152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_0; 15162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_r1; 15172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_r2; 15182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_r3; 15192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char *cm = ff_cropTbl + CROP_WIDTH; 15202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector4a = 64; 15222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector1b = sub_pel_filterss[yoffset][0]; 15242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector2b = sub_pel_filterss[yoffset][2]; 15252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector3b = sub_pel_filterss[yoffset][1]; 15262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15271b362b15af34006e6a11974088a46d42b903418eJohann if (vector1b) 15281b362b15af34006e6a11974088a46d42b903418eJohann { 15292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 6 tap filter */ 15302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 15322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr); 15332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = output_height; i--;) 15352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 15362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 15372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 15382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -16(%[src_ptr]) \n\t" 15392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" 15402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" 15412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" 15422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" 15432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 24(%[src_ptr]) \n\t" 15442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 15452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 15472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 15482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 15492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" 15502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 15512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 15522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -15(%[src_ptr]) \n\t" 15542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" 15552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" 15562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" 15572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" 15582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 25(%[src_ptr]) \n\t" 15592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 15602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac2, 9 \n\t" 15612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 15632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 15642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 15652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" 15662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 15672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 15682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -14(%[src_ptr]) \n\t" 15702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" 15712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" 15722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" 15732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 18(%[src_ptr]) \n\t" 15742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 26(%[src_ptr]) \n\t" 15752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 15762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 15772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 15792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 15802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 15812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" 15822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 15832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 15842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -13(%[src_ptr]) \n\t" 15862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" 15872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" 15882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" 15892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 19(%[src_ptr]) \n\t" 15902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 27(%[src_ptr]) \n\t" 15912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 15922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac0, 9 \n\t" 15932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 15942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 15952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 15962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 15972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" 15982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 15992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 16002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16011b362b15af34006e6a11974088a46d42b903418eJohann : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3), 16021b362b15af34006e6a11974088a46d42b903418eJohann [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 16031b362b15af34006e6a11974088a46d42b903418eJohann [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), 16041b362b15af34006e6a11974088a46d42b903418eJohann [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) 16051b362b15af34006e6a11974088a46d42b903418eJohann : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 16061b362b15af34006e6a11974088a46d42b903418eJohann [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), 16071b362b15af34006e6a11974088a46d42b903418eJohann [src_ptr] "r" (src_ptr) 16082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 16092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 16112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 16122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -12(%[src_ptr]) \n\t" 16132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" 16142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" 16152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 12(%[src_ptr]) \n\t" 16162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 20(%[src_ptr]) \n\t" 16172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 28(%[src_ptr]) \n\t" 16182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 16192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 16212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 16222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 16232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" 16242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 16252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 16262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac1, 9 \n\t" 16272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -11(%[src_ptr]) \n\t" 16292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" 16302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" 16312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 13(%[src_ptr]) \n\t" 16322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 21(%[src_ptr]) \n\t" 16332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 29(%[src_ptr]) \n\t" 16342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 16352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp5], $ac2, 9 \n\t" 16362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 16382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 16392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 16402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" 16412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 16422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 16432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -10(%[src_ptr]) \n\t" 16452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" 16462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" 16472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 14(%[src_ptr]) \n\t" 16482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 22(%[src_ptr]) \n\t" 16492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 30(%[src_ptr]) \n\t" 16502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 16512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp6], $ac3, 9 \n\t" 16522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 16542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 16552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 16562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" 16572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 16582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 16592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -9(%[src_ptr]) \n\t" 16612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" 16622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" 16632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 15(%[src_ptr]) \n\t" 16642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 23(%[src_ptr]) \n\t" 16652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 31(%[src_ptr]) \n\t" 16662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 16672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp7], $ac0, 9 \n\t" 16682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 16702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 16712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 16722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" 16732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 16742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 16752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp8], $ac1, 9 \n\t" 16762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp4] "=&r" (Temp4), [Temp5] "=&r" (Temp5), 16782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp6] "=&r" (Temp6), [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), 16792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 16802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), 16812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3) 16822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 16832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), 16842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr] "r" (src_ptr) 16852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 16862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 16882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[0] = cm[Temp1]; 16892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[1] = cm[Temp2]; 16902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[2] = cm[Temp3]; 16912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[3] = cm[Temp4]; 16922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[4] = cm[Temp5]; 16932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[5] = cm[Temp6]; 16942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[6] = cm[Temp7]; 16952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[7] = cm[Temp8]; 16962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 16972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr += 8; 16982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += output_pitch; 16992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 17002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 17011b362b15af34006e6a11974088a46d42b903418eJohann else 17021b362b15af34006e6a11974088a46d42b903418eJohann { 17032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 4 tap filter */ 17042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 17062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr); 17072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = output_height; i--;) 17092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 17102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 17112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" 17122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" 17132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" 17142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" 17152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 17162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 17172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 17182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 17192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 17202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 17222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) 17232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 17242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) 17252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 17262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 17282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" 17292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" 17302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" 17312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" 17322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 17332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 17342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 17352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 17362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 17372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac2, 9 \n\t" 17382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=r" (Temp1), 17402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 17412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) 17422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 17432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) 17442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 17452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_l1 = src_ptr[-6]; 17472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_0 = src_ptr[2]; 17482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r1 = src_ptr[10]; 17492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r2 = src_ptr[18]; 17502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 17522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 17532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 17542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 17552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 17562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 17572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 17582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp2] "=r" (Temp2) 17602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 17612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), 17622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), 17632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a) 17642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 17652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_l1 = src_ptr[-5]; 17672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_0 = src_ptr[3]; 17682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r1 = src_ptr[11]; 17692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r2 = src_ptr[19]; 17702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 17722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 17732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 17742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 17752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 17762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 17772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac0, 9 \n\t" 17782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp3] "=r" (Temp3) 17802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 17812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), 17822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), 17832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a) 17842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 17852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_l1 = src_ptr[-4]; 17872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_0 = src_ptr[4]; 17882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r1 = src_ptr[12]; 17892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r2 = src_ptr[20]; 17902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 17922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 17932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 17942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 17952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 17962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 17972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac1, 9 \n\t" 17982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 17992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp4] "=r" (Temp4) 18002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 18012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), 18022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), 18032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a) 18042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 18052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_l1 = src_ptr[-3]; 18072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_0 = src_ptr[5]; 18082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r1 = src_ptr[13]; 18092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r2 = src_ptr[21]; 18102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 18122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 18132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 18142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 18152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 18162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 18172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp5], $ac2, 9 \n\t" 18182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp5] "=&r" (Temp5) 18202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 18212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), 18222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), 18232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a) 18242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 18252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_l1 = src_ptr[-2]; 18272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_0 = src_ptr[6]; 18282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r1 = src_ptr[14]; 18292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r2 = src_ptr[22]; 18302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 18322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 18332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 18342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 18352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 18362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 18372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp6], $ac3, 9 \n\t" 18382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp6] "=r" (Temp6) 18402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 18412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), 18422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), 18432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a) 18442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 18452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_l1 = src_ptr[-1]; 18472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_0 = src_ptr[7]; 18482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r1 = src_ptr[15]; 18492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr_r2 = src_ptr[23]; 18502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 18522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 18532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 18542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 18552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 18562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 18572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp7], $ac0, 9 \n\t" 18582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp8], $ac1, 9 \n\t" 18592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8) 18612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 18622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0), 18632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2), 18642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a) 18652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 18662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 18682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[0] = cm[Temp1]; 18692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[1] = cm[Temp2]; 18702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[2] = cm[Temp3]; 18712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[3] = cm[Temp4]; 18722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[4] = cm[Temp5]; 18732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[5] = cm[Temp6]; 18742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[6] = cm[Temp7]; 18752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[7] = cm[Temp8]; 18762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr += 8; 18782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += output_pitch; 18792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 18802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 18812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 18822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_second_pass161 18852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 18861b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 18871b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT output_ptr, 18882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int output_pitch, 18892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan const unsigned short *vp8_filter 18902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 18912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 18922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int i, j; 18932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; 18952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int vector4a; 18962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int vector1b, vector2b, vector3b; 18972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 18982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_l2; 18992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_l1; 19002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_0; 19012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_r1; 19022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_r2; 19032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char src_ptr_r3; 19042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned char *cm = ff_cropTbl + CROP_WIDTH; 19052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector4a = 64; 19072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector1b = vp8_filter[0]; 19092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector2b = vp8_filter[2]; 19102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vector3b = vp8_filter[1]; 19112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19121b362b15af34006e6a11974088a46d42b903418eJohann if (vector1b == 0) 19131b362b15af34006e6a11974088a46d42b903418eJohann { 19142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 4 tap filter */ 19152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 19172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr + 16); 19182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = 16; i--;) 19202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 19212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* unrolling for loop */ 19222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (j = 0; j < 16; j += 8) 19232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 19242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 19252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 19262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t" 19272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" 19282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t" 19292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t" 19302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 19312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 19322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 19332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 19342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 19352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t" 19372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" 19382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t" 19392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t" 19402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 19412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac2, 9 \n\t" 19422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 19442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 19452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 19462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 19472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t" 19492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" 19502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t" 19512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t" 19522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 19532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac3, 9 \n\t" 19542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 19562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 19572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 19582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 19592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t" 19612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" 19622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t" 19632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t" 19642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 19652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac1, 9 \n\t" 19662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 19682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 19692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 19702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 19712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t" 19732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" 19742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t" 19752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t" 19762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 19772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac3, 9 \n\t" 19782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 19802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 19812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 19822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 19832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t" 19852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" 19862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t" 19872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t" 19882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 19892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp5], $ac2, 9 \n\t" 19902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 19922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 19932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 19942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 19952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 19962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t" 19972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" 19982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t" 19992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t" 20002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 20012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp6], $ac3, 9 \n\t" 20022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 20042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 20052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 20062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 20072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t" 20092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" 20102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t" 20112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t" 20122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 20132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp7], $ac1, 9 \n\t" 20142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 20162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 20172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 20182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 20192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp8], $ac3, 9 \n\t" 20202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 20222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), 20232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), 20242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), 20252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 20262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2) 20272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b), 20282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr) 20292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 20302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 20322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[j] = cm[Temp1]; 20331b362b15af34006e6a11974088a46d42b903418eJohann output_ptr[j + 1] = cm[Temp2]; 20341b362b15af34006e6a11974088a46d42b903418eJohann output_ptr[j + 2] = cm[Temp3]; 20351b362b15af34006e6a11974088a46d42b903418eJohann output_ptr[j + 3] = cm[Temp4]; 20361b362b15af34006e6a11974088a46d42b903418eJohann output_ptr[j + 4] = cm[Temp5]; 20371b362b15af34006e6a11974088a46d42b903418eJohann output_ptr[j + 5] = cm[Temp6]; 20381b362b15af34006e6a11974088a46d42b903418eJohann output_ptr[j + 6] = cm[Temp7]; 20391b362b15af34006e6a11974088a46d42b903418eJohann output_ptr[j + 7] = cm[Temp8]; 20402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr += 8; 20422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 20432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += output_pitch; 20452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 20462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 20471b362b15af34006e6a11974088a46d42b903418eJohann else 20481b362b15af34006e6a11974088a46d42b903418eJohann { 20492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* 4 tap filter */ 20502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 20522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load(src_ptr + 16); 20532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* unroll for loop */ 20552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (i = 16; i--;) 20562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 20572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 20582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 20592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -32(%[src_ptr]) \n\t" 20602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t" 20612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" 20622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t" 20632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t" 20642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 48(%[src_ptr]) \n\t" 20652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 20662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 20682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 20692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 20702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" 20712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 20722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 20732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -31(%[src_ptr]) \n\t" 20752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t" 20762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" 20772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t" 20782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t" 20792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 49(%[src_ptr]) \n\t" 20802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 20812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac2, 9 \n\t" 20822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 20842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 20852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 20862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" 20872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 20882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 20892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -30(%[src_ptr]) \n\t" 20912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t" 20922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" 20932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t" 20942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t" 20952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 50(%[src_ptr]) \n\t" 20962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 20972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac0, 9 \n\t" 20982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 20992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 21002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 21012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 21022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" 21032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 21042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 21052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -29(%[src_ptr]) \n\t" 21072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t" 21082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" 21092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t" 21102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t" 21112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 51(%[src_ptr]) \n\t" 21122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 21132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac1, 9 \n\t" 21142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 21162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 21172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 21182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" 21192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 21202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 21212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -28(%[src_ptr]) \n\t" 21232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t" 21242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" 21252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t" 21262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t" 21272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 52(%[src_ptr]) \n\t" 21282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 21292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac3, 9 \n\t" 21302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 21322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 21332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 21342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" 21352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 21362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 21372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -27(%[src_ptr]) \n\t" 21392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t" 21402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" 21412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t" 21422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t" 21432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 53(%[src_ptr]) \n\t" 21442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 21452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp5], $ac2, 9 \n\t" 21462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 21482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 21492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 21502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" 21512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 21522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 21532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -26(%[src_ptr]) \n\t" 21552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t" 21562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" 21572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t" 21582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t" 21592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 54(%[src_ptr]) \n\t" 21602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 21612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp6], $ac0, 9 \n\t" 21622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 21642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 21652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 21662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" 21672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 21682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 21692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -25(%[src_ptr]) \n\t" 21712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t" 21722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" 21732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t" 21742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t" 21752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 55(%[src_ptr]) \n\t" 21762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 21772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp7], $ac1, 9 \n\t" 21782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 21802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 21812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 21822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" 21832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 21842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 21852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp8], $ac3, 9 \n\t" 21862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 21882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), 21892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), 21902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), 21912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 21922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), 21932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3) 21942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 21952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), 21962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr] "r" (src_ptr) 21972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 21982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 21992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* clamp and store results */ 22002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[0] = cm[Temp1]; 22012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[1] = cm[Temp2]; 22022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[2] = cm[Temp3]; 22032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[3] = cm[Temp4]; 22042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[4] = cm[Temp5]; 22052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[5] = cm[Temp6]; 22062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[6] = cm[Temp7]; 22072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[7] = cm[Temp8]; 22082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* apply filter with vectors pairs */ 22102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 22112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -24(%[src_ptr]) \n\t" 22122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" 22132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 8(%[src_ptr]) \n\t" 22142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 24(%[src_ptr]) \n\t" 22152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 40(%[src_ptr]) \n\t" 22162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 56(%[src_ptr]) \n\t" 22172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 22182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 22202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 22212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 22222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" 22232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 22242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 22252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -23(%[src_ptr]) \n\t" 22272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" 22282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 9(%[src_ptr]) \n\t" 22292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 25(%[src_ptr]) \n\t" 22302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 41(%[src_ptr]) \n\t" 22312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 57(%[src_ptr]) \n\t" 22322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 22332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp1], $ac2, 9 \n\t" 22342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 22362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 22372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 22382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" 22392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 22402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 22412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -22(%[src_ptr]) \n\t" 22432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" 22442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 10(%[src_ptr]) \n\t" 22452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 26(%[src_ptr]) \n\t" 22462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 42(%[src_ptr]) \n\t" 22472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 58(%[src_ptr]) \n\t" 22482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 22492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp2], $ac0, 9 \n\t" 22502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 22522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 22532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 22542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" 22552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 22562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 22572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -21(%[src_ptr]) \n\t" 22592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" 22602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 11(%[src_ptr]) \n\t" 22612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 27(%[src_ptr]) \n\t" 22622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 43(%[src_ptr]) \n\t" 22632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 59(%[src_ptr]) \n\t" 22642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 22652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp3], $ac1, 9 \n\t" 22662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 22682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 22692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 22702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" 22712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 22722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 22732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -20(%[src_ptr]) \n\t" 22752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" 22762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 12(%[src_ptr]) \n\t" 22772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 28(%[src_ptr]) \n\t" 22782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 44(%[src_ptr]) \n\t" 22792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 60(%[src_ptr]) \n\t" 22802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac2 \n\t" 22812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp4], $ac3, 9 \n\t" 22822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 22842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 22852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 22862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" 22872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" 22882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" 22892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -19(%[src_ptr]) \n\t" 22912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" 22922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 13(%[src_ptr]) \n\t" 22932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 29(%[src_ptr]) \n\t" 22942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 45(%[src_ptr]) \n\t" 22952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 61(%[src_ptr]) \n\t" 22962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac0 \n\t" 22972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp5], $ac2, 9 \n\t" 22982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 22992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 23002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 23012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 23022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" 23032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" 23042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" 23052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -18(%[src_ptr]) \n\t" 23072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" 23082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 14(%[src_ptr]) \n\t" 23092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 30(%[src_ptr]) \n\t" 23102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 46(%[src_ptr]) \n\t" 23112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 62(%[src_ptr]) \n\t" 23122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac1 \n\t" 23132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp6], $ac0, 9 \n\t" 23142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 23162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 23172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 23182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" 23192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" 23202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" 23212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l2], -17(%[src_ptr]) \n\t" 23232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" 23242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_0], 15(%[src_ptr]) \n\t" 23252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r1], 31(%[src_ptr]) \n\t" 23262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r2], 47(%[src_ptr]) \n\t" 23272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "lbu %[src_ptr_r3], 63(%[src_ptr]) \n\t" 23282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "mtlo %[vector4a], $ac3 \n\t" 23292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp7], $ac1, 9 \n\t" 23302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" 23322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" 23332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" 23342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" 23352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" 23362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" 23372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "extp %[Temp8], $ac3, 9 \n\t" 23382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), 23402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4), 23412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6), 23422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8), 23432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0), 23442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2), 23452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3) 23462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b), 23472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [vector3b] "r" (vector3b), [vector4a] "r" (vector4a), 23482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_ptr] "r" (src_ptr) 23492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 23502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr += 16; 23522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[8] = cm[Temp1]; 23532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[9] = cm[Temp2]; 23542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[10] = cm[Temp3]; 23552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[11] = cm[Temp4]; 23562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[12] = cm[Temp5]; 23572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[13] = cm[Temp6]; 23582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[14] = cm[Temp7]; 23592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr[15] = cm[Temp8]; 23602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan output_ptr += output_pitch; 23622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 23632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 23642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 23652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23671b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_sixtap_predict4x4_dspr2 23682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 23691b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 23702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int src_pixels_per_line, 23712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int xoffset, 23722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int yoffset, 23731b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst_ptr, 23742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int dst_pitch 23752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 23762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 23771b362b15af34006e6a11974088a46d42b903418eJohann unsigned char FData[9 * 4]; /* Temp data bufffer used in filtering */ 23782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int pos = 16; 23792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* bit positon for extract from acc */ 23812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 23822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "wrdsp %[pos], 1 \n\t" 23832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 23842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [pos] "r" (pos) 23852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 23862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 23871b362b15af34006e6a11974088a46d42b903418eJohann if (yoffset) 23881b362b15af34006e6a11974088a46d42b903418eJohann { 23892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* First filter 1-D horizontally... */ 23902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_first_pass_4(src_ptr - (2 * src_pixels_per_line), FData, 23912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_pixels_per_line, 9, xoffset, 4); 23922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* then filter verticaly... */ 23932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_second_pass4(FData + 8, dst_ptr, dst_pitch, yoffset); 23942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 23952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan else 23962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ 23972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_first_pass_4(src_ptr, dst_ptr, src_pixels_per_line, 23982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4, xoffset, dst_pitch); 23992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 24002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24021b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_sixtap_predict8x8_dspr2 24032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 24041b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 24052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int src_pixels_per_line, 24062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int xoffset, 24072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int yoffset, 24081b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst_ptr, 24092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int dst_pitch 24102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 24112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 24122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24131b362b15af34006e6a11974088a46d42b903418eJohann unsigned char FData[13 * 8]; /* Temp data bufffer used in filtering */ 24141b362b15af34006e6a11974088a46d42b903418eJohann unsigned int pos, Temp1, Temp2; 24152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan pos = 16; 24172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* bit positon for extract from acc */ 24192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 24202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "wrdsp %[pos], 1 \n\t" 24212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 24222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [pos] "r" (pos) 24232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 24242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24251b362b15af34006e6a11974088a46d42b903418eJohann if (yoffset) 24261b362b15af34006e6a11974088a46d42b903418eJohann { 24272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr = src_ptr - (2 * src_pixels_per_line); 24292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan if (xoffset) 24312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* filter 1-D horizontally... */ 24322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, 24332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 13, xoffset, 8); 24342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24351b362b15af34006e6a11974088a46d42b903418eJohann else 24361b362b15af34006e6a11974088a46d42b903418eJohann { 24372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 24381b362b15af34006e6a11974088a46d42b903418eJohann prefetch_load(src_ptr + 2 * src_pixels_per_line); 24392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 24412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 0(%[FData]) \n\t" 24442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 4(%[FData]) \n\t" 24452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 24462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 8(%[FData]) \n\t" 24502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 12(%[FData]) \n\t" 24512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 24522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 16(%[FData]) \n\t" 24562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 20(%[FData]) \n\t" 24572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 24582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 24(%[FData]) \n\t" 24622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 28(%[FData]) \n\t" 24632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 24642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 32(%[FData]) \n\t" 24682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 36(%[FData]) \n\t" 24692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 24702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 40(%[FData]) \n\t" 24742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 44(%[FData]) \n\t" 24752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 24762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 48(%[FData]) \n\t" 24802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 52(%[FData]) \n\t" 24812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 24822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 56(%[FData]) \n\t" 24862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 60(%[FData]) \n\t" 24872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 24882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 64(%[FData]) \n\t" 24922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 68(%[FData]) \n\t" 24932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 24942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 24952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 24962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 24972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 72(%[FData]) \n\t" 24982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 76(%[FData]) \n\t" 24992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 80(%[FData]) \n\t" 25042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 84(%[FData]) \n\t" 25052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 88(%[FData]) \n\t" 25102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 92(%[FData]) \n\t" 25112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 96(%[FData]) \n\t" 25162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 100(%[FData]) \n\t" 25172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) 25192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [FData] "r" (FData), [src_ptr] "r" (src_ptr), 25202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_pixels_per_line] "r" (src_pixels_per_line) 25212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 25222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 25231b362b15af34006e6a11974088a46d42b903418eJohann 25242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* filter verticaly... */ 25252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 8, 8, yoffset); 25262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 25272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ 25291b362b15af34006e6a11974088a46d42b903418eJohann else 25301b362b15af34006e6a11974088a46d42b903418eJohann { 25312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan if (xoffset) 25322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, 25332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 8, xoffset, dst_pitch); 25342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25351b362b15af34006e6a11974088a46d42b903418eJohann else 25361b362b15af34006e6a11974088a46d42b903418eJohann { 25372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* copy from src buffer to dst buffer */ 25382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 25392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 0(%[dst_ptr]) \n\t" 25422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 4(%[dst_ptr]) \n\t" 25432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 8(%[dst_ptr]) \n\t" 25482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 12(%[dst_ptr]) \n\t" 25492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 16(%[dst_ptr]) \n\t" 25542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 20(%[dst_ptr]) \n\t" 25552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 24(%[dst_ptr]) \n\t" 25602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 28(%[dst_ptr]) \n\t" 25612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 32(%[dst_ptr]) \n\t" 25662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 36(%[dst_ptr]) \n\t" 25672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 40(%[dst_ptr]) \n\t" 25722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 44(%[dst_ptr]) \n\t" 25732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 48(%[dst_ptr]) \n\t" 25782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 52(%[dst_ptr]) \n\t" 25792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 25802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 25822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 25832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 56(%[dst_ptr]) \n\t" 25842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 60(%[dst_ptr]) \n\t" 25852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) 25872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr), 25882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_pixels_per_line] "r" (src_pixels_per_line) 25892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 25902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 25912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 25922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 25932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 25951b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_sixtap_predict8x4_dspr2 25962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 25971b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 25982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int src_pixels_per_line, 25992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int xoffset, 26002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int yoffset, 26011b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst_ptr, 26022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int dst_pitch 26032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 26042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 26051b362b15af34006e6a11974088a46d42b903418eJohann unsigned char FData[9 * 8]; /* Temp data bufffer used in filtering */ 26061b362b15af34006e6a11974088a46d42b903418eJohann unsigned int pos, Temp1, Temp2; 26072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan pos = 16; 26092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* bit positon for extract from acc */ 26112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 26122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "wrdsp %[pos], 1 \n\t" 26132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 26142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [pos] "r" (pos) 26152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 26162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26171b362b15af34006e6a11974088a46d42b903418eJohann if (yoffset) 26181b362b15af34006e6a11974088a46d42b903418eJohann { 26192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr = src_ptr - (2 * src_pixels_per_line); 26212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan if (xoffset) 26232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* filter 1-D horizontally... */ 26242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, 26252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 9, xoffset, 8); 26262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26271b362b15af34006e6a11974088a46d42b903418eJohann else 26281b362b15af34006e6a11974088a46d42b903418eJohann { 26292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* prefetch src_ptr data to cache memory */ 26301b362b15af34006e6a11974088a46d42b903418eJohann prefetch_load(src_ptr + 2 * src_pixels_per_line); 26312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 26332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 26342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 26352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 0(%[FData]) \n\t" 26362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 4(%[FData]) \n\t" 26372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 26382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 26402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 26412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 8(%[FData]) \n\t" 26422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 12(%[FData]) \n\t" 26432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 26442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 26462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 26472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 16(%[FData]) \n\t" 26482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 20(%[FData]) \n\t" 26492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 26502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 26522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 26532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 24(%[FData]) \n\t" 26542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 28(%[FData]) \n\t" 26552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 26562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 26582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 26592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 32(%[FData]) \n\t" 26602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 36(%[FData]) \n\t" 26612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 26622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 26642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 26652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 40(%[FData]) \n\t" 26662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 44(%[FData]) \n\t" 26672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 26682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 26702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 26712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 48(%[FData]) \n\t" 26722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 52(%[FData]) \n\t" 26732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 26742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 26762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 26772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 56(%[FData]) \n\t" 26782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 60(%[FData]) \n\t" 26792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 26802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 26822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 26832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 64(%[FData]) \n\t" 26842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 68(%[FData]) \n\t" 26852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) 26872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [FData] "r" (FData), [src_ptr] "r" (src_ptr), 26882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_pixels_per_line] "r" (src_pixels_per_line) 26892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 26902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 26911b362b15af34006e6a11974088a46d42b903418eJohann 26922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* filter verticaly... */ 26932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 4, 8, yoffset); 26942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 26952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 26962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ 26971b362b15af34006e6a11974088a46d42b903418eJohann else 26981b362b15af34006e6a11974088a46d42b903418eJohann { 26992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan if (xoffset) 27002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, 27012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 4, xoffset, dst_pitch); 27022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27031b362b15af34006e6a11974088a46d42b903418eJohann else 27041b362b15af34006e6a11974088a46d42b903418eJohann { 27052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* copy from src buffer to dst buffer */ 27062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 27072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 27082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 27092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 0(%[dst_ptr]) \n\t" 27102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 4(%[dst_ptr]) \n\t" 27112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 27122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 27142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 27152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 8(%[dst_ptr]) \n\t" 27162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 12(%[dst_ptr]) \n\t" 27172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 27182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 27202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 27212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 16(%[dst_ptr]) \n\t" 27222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 20(%[dst_ptr]) \n\t" 27232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" 27242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp1], 0(%[src_ptr]) \n\t" 27262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[Temp2], 4(%[src_ptr]) \n\t" 27272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp1], 24(%[dst_ptr]) \n\t" 27282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[Temp2], 28(%[dst_ptr]) \n\t" 27292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2) 27312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr), 27322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [src_pixels_per_line] "r" (src_pixels_per_line) 27332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 27342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 27352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 27362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 27372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27391b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_sixtap_predict16x16_dspr2 27402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan( 27411b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src_ptr, 27422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int src_pixels_per_line, 27432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int xoffset, 27442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int yoffset, 27451b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst_ptr, 27462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int dst_pitch 27472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan) 27482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 27492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan const unsigned short *VFilter; 27501b362b15af34006e6a11974088a46d42b903418eJohann unsigned char FData[21 * 16]; /* Temp data bufffer used in filtering */ 27512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int pos; 27522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan VFilter = sub_pel_filterss[yoffset]; 27542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan pos = 16; 27562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* bit positon for extract from acc */ 27582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 27592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "wrdsp %[pos], 1 \n\t" 27602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 27612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [pos] "r" (pos) 27622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 27632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27641b362b15af34006e6a11974088a46d42b903418eJohann if (yoffset) 27651b362b15af34006e6a11974088a46d42b903418eJohann { 27662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src_ptr = src_ptr - (2 * src_pixels_per_line); 27682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 27691b362b15af34006e6a11974088a46d42b903418eJohann switch (xoffset) 27701b362b15af34006e6a11974088a46d42b903418eJohann { 27712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* filter 1-D horizontally... */ 27721b362b15af34006e6a11974088a46d42b903418eJohann case 2: 27731b362b15af34006e6a11974088a46d42b903418eJohann case 4: 27741b362b15af34006e6a11974088a46d42b903418eJohann case 6: 27751b362b15af34006e6a11974088a46d42b903418eJohann /* 6 tap filter */ 27761b362b15af34006e6a11974088a46d42b903418eJohann vp8_filter_block2d_first_pass16_6tap(src_ptr, FData, src_pixels_per_line, 27771b362b15af34006e6a11974088a46d42b903418eJohann 21, xoffset, 16); 27781b362b15af34006e6a11974088a46d42b903418eJohann break; 27791b362b15af34006e6a11974088a46d42b903418eJohann 27801b362b15af34006e6a11974088a46d42b903418eJohann case 0: 27811b362b15af34006e6a11974088a46d42b903418eJohann /* only copy buffer */ 27821b362b15af34006e6a11974088a46d42b903418eJohann vp8_filter_block2d_first_pass16_0(src_ptr, FData, src_pixels_per_line); 27831b362b15af34006e6a11974088a46d42b903418eJohann break; 27841b362b15af34006e6a11974088a46d42b903418eJohann 27851b362b15af34006e6a11974088a46d42b903418eJohann case 1: 27861b362b15af34006e6a11974088a46d42b903418eJohann case 3: 27871b362b15af34006e6a11974088a46d42b903418eJohann case 5: 27881b362b15af34006e6a11974088a46d42b903418eJohann case 7: 27891b362b15af34006e6a11974088a46d42b903418eJohann /* 4 tap filter */ 27901b362b15af34006e6a11974088a46d42b903418eJohann vp8_filter_block2d_first_pass16_4tap(src_ptr, FData, src_pixels_per_line, 16, 27911b362b15af34006e6a11974088a46d42b903418eJohann 21, xoffset, yoffset, dst_ptr, dst_pitch); 27921b362b15af34006e6a11974088a46d42b903418eJohann break; 27932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 27941b362b15af34006e6a11974088a46d42b903418eJohann 27952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* filter verticaly... */ 27962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan vp8_filter_block2d_second_pass161(FData + 32, dst_ptr, dst_pitch, VFilter); 27972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 27981b362b15af34006e6a11974088a46d42b903418eJohann else 27991b362b15af34006e6a11974088a46d42b903418eJohann { 28002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ 28011b362b15af34006e6a11974088a46d42b903418eJohann switch (xoffset) 28021b362b15af34006e6a11974088a46d42b903418eJohann { 28031b362b15af34006e6a11974088a46d42b903418eJohann case 2: 28041b362b15af34006e6a11974088a46d42b903418eJohann case 4: 28051b362b15af34006e6a11974088a46d42b903418eJohann case 6: 28061b362b15af34006e6a11974088a46d42b903418eJohann /* 6 tap filter */ 28071b362b15af34006e6a11974088a46d42b903418eJohann vp8_filter_block2d_first_pass16_6tap(src_ptr, dst_ptr, src_pixels_per_line, 28081b362b15af34006e6a11974088a46d42b903418eJohann 16, xoffset, dst_pitch); 28091b362b15af34006e6a11974088a46d42b903418eJohann break; 28101b362b15af34006e6a11974088a46d42b903418eJohann 28111b362b15af34006e6a11974088a46d42b903418eJohann case 1: 28121b362b15af34006e6a11974088a46d42b903418eJohann case 3: 28131b362b15af34006e6a11974088a46d42b903418eJohann case 5: 28141b362b15af34006e6a11974088a46d42b903418eJohann case 7: 28151b362b15af34006e6a11974088a46d42b903418eJohann /* 4 tap filter */ 28161b362b15af34006e6a11974088a46d42b903418eJohann vp8_filter_block2d_first_pass16_4tap(src_ptr, dst_ptr, src_pixels_per_line, 16, 28171b362b15af34006e6a11974088a46d42b903418eJohann 21, xoffset, yoffset, dst_ptr, dst_pitch); 28181b362b15af34006e6a11974088a46d42b903418eJohann break; 28192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 28202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 28212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 28221b362b15af34006e6a11974088a46d42b903418eJohann 28231b362b15af34006e6a11974088a46d42b903418eJohann#endif 2824