12f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan/*
21b362b15af34006e6a11974088a46d42b903418eJohann *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
32f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan *
42f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan *  Use of this source code is governed by a BSD-style license
52f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan *  that can be found in the LICENSE file in the root of the source
62f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan *  tree. An additional intellectual property rights grant can be found
72f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan *  in the file PATENTS.  All contributing project authors may
82f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan *  be found in the AUTHORS file in the root of the source tree.
92f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan */
102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
121b362b15af34006e6a11974088a46d42b903418eJohann#include <stdlib.h>
13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp8_rtcd.h"
141b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_ports/mem.h"
152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
161b362b15af34006e6a11974088a46d42b903418eJohann#if HAVE_DSPR2
172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan#define CROP_WIDTH 256
181b362b15af34006e6a11974088a46d42b903418eJohannunsigned char ff_cropTbl[256 + 2 * CROP_WIDTH];
192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const unsigned short sub_pel_filterss[8][3] =
212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {      0,      0,      0},
232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {      0, 0x0601, 0x7b0c},
242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0201, 0x0b08, 0x6c24},
252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {      0, 0x0906, 0x5d32},
262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0303, 0x1010, 0x4d4d},
272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {      0, 0x0609, 0x325d},
282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0102, 0x080b, 0x246c},
292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {      0, 0x0106, 0x0c7b},
302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan};
312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
321b362b15af34006e6a11974088a46d42b903418eJohann
332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const int sub_pel_filters_int[8][3] =
342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0,          0},
362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0000fffa, 0x007b000c, 0xffff0000},
372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0002fff5, 0x006c0024, 0xfff80001},
382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0000fff7, 0x005d0032, 0xfffa0000},
392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0003fff0, 0x004d004d, 0xfff00003},
402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0000fffa, 0x0032005d, 0xfff70000},
412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0001fff8, 0x0024006c, 0xfff50002},
422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0000ffff, 0x000c007b, 0xfffa0000},
432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan};
442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
451b362b15af34006e6a11974088a46d42b903418eJohann
462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const int sub_pel_filters_inv[8][3] =
472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0,          0},
492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xfffa0000, 0x000c007b, 0x0000ffff},
502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xfff50002, 0x0024006c, 0x0001fff8},
512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xfff70000, 0x0032005d, 0x0000fffa},
522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xfff00003, 0x004d004d, 0x0003fff0},
532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xfffa0000, 0x005d0032, 0x0000fff7},
542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xfff80001, 0x006c0024, 0x0002fff5},
552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xffff0000, 0x007b000c, 0x0000fffa},
562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan};
572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
581b362b15af34006e6a11974088a46d42b903418eJohann
592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const int sub_pel_filters_int_tap_4[8][2] =
602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0},
622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xfffa007b, 0x000cffff},
632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0},
642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xfff7005d, 0x0032fffa},
652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0},
662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xfffa0032, 0x005dfff7},
672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0},
682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0xffff000c, 0x007bfffa},
692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan};
702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
711b362b15af34006e6a11974088a46d42b903418eJohann
722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanstatic const int sub_pel_filters_inv_tap_4[8][2] =
732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0},
752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x007bfffa, 0xffff000c},
762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0},
772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x005dfff7, 0xfffa0032},
782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0},
792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x0032fffa, 0xfff7005d},
802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {          0,          0},
812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    { 0x000cffff, 0xfffa007b},
822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan};
832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
841b362b15af34006e6a11974088a46d42b903418eJohanninline void prefetch_load(unsigned char *src)
851b362b15af34006e6a11974088a46d42b903418eJohann{
862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    __asm__ __volatile__ (
872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        "pref   0,  0(%[src])   \n\t"
882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        :
892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        : [src] "r" (src)
902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    );
912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
941b362b15af34006e6a11974088a46d42b903418eJohanninline void prefetch_store(unsigned char *dst)
951b362b15af34006e6a11974088a46d42b903418eJohann{
962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    __asm__ __volatile__ (
972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        "pref   1,  0(%[dst])   \n\t"
982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        :
992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        : [dst] "r" (dst)
1002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    );
1012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
1022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid dsputil_static_init(void)
1042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
1052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int i;
1062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1071b362b15af34006e6a11974088a46d42b903418eJohann    for (i = 0; i < 256; i++) ff_cropTbl[i + CROP_WIDTH] = i;
1081b362b15af34006e6a11974088a46d42b903418eJohann
1091b362b15af34006e6a11974088a46d42b903418eJohann    for (i = 0; i < CROP_WIDTH; i++)
1101b362b15af34006e6a11974088a46d42b903418eJohann    {
1112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        ff_cropTbl[i] = 0;
1122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        ff_cropTbl[i + CROP_WIDTH + 256] = 255;
1132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
1142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
1152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass_4
1172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
1181b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT src_ptr,
1191b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT dst_ptr,
1202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int src_pixels_per_line,
1212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int output_height,
1222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int xoffset,
1232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int pitch
1242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
1252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
1262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int i;
1272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int Temp1, Temp2, Temp3, Temp4;
1282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int vector4a = 64;
1302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int vector1b, vector2b, vector3b;
1312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int tp1, tp2, tn1, tn2;
1322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int p1, p2, p3;
1332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int n1, n2, n3;
1342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
1352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector3b = sub_pel_filters_inv[xoffset][2];
1372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1381b362b15af34006e6a11974088a46d42b903418eJohann    /* if (xoffset == 0) we don't need any filtering */
1391b362b15af34006e6a11974088a46d42b903418eJohann    if (vector3b == 0)
1401b362b15af34006e6a11974088a46d42b903418eJohann    {
1412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = 0; i < output_height; i++)
1422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
1432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* prefetch src_ptr data to cache memory */
1442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            prefetch_load(src_ptr + src_pixels_per_line);
1452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[0] = src_ptr[0];
1462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[1] = src_ptr[1];
1472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[2] = src_ptr[2];
1482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[3] = src_ptr[3];
1492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* next row... */
1511b362b15af34006e6a11974088a46d42b903418eJohann            src_ptr += src_pixels_per_line;
1522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr += 4;
1532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
1542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
1551b362b15af34006e6a11974088a46d42b903418eJohann    else
1561b362b15af34006e6a11974088a46d42b903418eJohann    {
1571b362b15af34006e6a11974088a46d42b903418eJohann        if (vector3b > 65536)
1581b362b15af34006e6a11974088a46d42b903418eJohann        {
1592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* 6 tap filter */
1602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vector1b = sub_pel_filters_inv[xoffset][0];
1622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vector2b = sub_pel_filters_inv[xoffset][1];
1632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* prefetch src_ptr data to cache memory */
1652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            prefetch_load(src_ptr + src_pixels_per_line);
1662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            for (i = output_height; i--;)
1682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            {
1692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* apply filter with vectors pairs */
1702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                __asm__ __volatile__ (
1712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp1],      -2(%[src_ptr])                 \n\t"
1722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp2],      2(%[src_ptr])                  \n\t"
1732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 1. pixel */
1752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
1762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
1772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
1782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
1792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
1802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
1812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p3],          %[vector3b]    \n\t"
1822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 2. pixel */
1842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
1852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p1],       %[tp2]                         \n\t"
1862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "balign           %[tp2],      %[tp1],         3              \n\t"
1872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp1],    $ac3,           9              \n\t"
1882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
1892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
1902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p1],          %[vector3b]    \n\t"
1912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
1922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 1. pixel */
1932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tn2],      3(%[src_ptr])                  \n\t"
1942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
1952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n1],       %[tp2]                         \n\t"
1962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n2],       %[tp2]                         \n\t"
1972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n3],       %[tn2]                         \n\t"
1982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp3],    $ac2,           9              \n\t"
1992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n1],          %[vector1b]    \n\t"
2002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n2],          %[vector2b]    \n\t"
2012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n3],          %[vector3b]    \n\t"
2022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 2. pixel */
2042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
2052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n1],       %[tn2]                         \n\t"
2062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp2],    $ac3,           9              \n\t"
2072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n2],          %[vector1b]    \n\t"
2082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n3],          %[vector2b]    \n\t"
2092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n1],          %[vector3b]    \n\t"
2102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp4],    $ac2,           9              \n\t"
2112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* clamp */
2132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tp1],      %[Temp1](%[cm])                \n\t"
2142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tn1],      %[Temp2](%[cm])                \n\t"
2152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tp2],      %[Temp3](%[cm])                \n\t"
2162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[n2],       %[Temp4](%[cm])                \n\t"
2172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* store bytes */
2192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tp1],      0(%[dst_ptr])                  \n\t"
2202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tn1],      1(%[dst_ptr])                  \n\t"
2212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tp2],      2(%[dst_ptr])                  \n\t"
2222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[n2],       3(%[dst_ptr])                  \n\t"
2232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1),
2252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2),
2262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [p3] "=&r" (p3), [n1] "=&r" (n1), [n2] "=&r" (n2),
2272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [n3] "=&r" (n3), [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
2282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
2292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
2302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr),
2312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector3b] "r" (vector3b), [src_ptr] "r" (src_ptr)
2322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                );
2332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* Next row... */
2351b362b15af34006e6a11974088a46d42b903418eJohann                src_ptr += src_pixels_per_line;
2362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr += pitch;
2372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            }
2382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
2391b362b15af34006e6a11974088a46d42b903418eJohann        else
2401b362b15af34006e6a11974088a46d42b903418eJohann        {
2412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* 4 tap filter */
2422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vector1b = sub_pel_filters_inv_tap_4[xoffset][0];
2442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vector2b = sub_pel_filters_inv_tap_4[xoffset][1];
2452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            for (i = output_height; i--;)
2472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            {
2482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* apply filter with vectors pairs */
2492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                __asm__ __volatile__ (
2502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp1],      -1(%[src_ptr])                 \n\t"
2512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp2],      3(%[src_ptr])                  \n\t"
2522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 1. pixel */
2542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
2552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
2562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
2572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
2582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
2592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
2602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 2. pixel */
2622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
2632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
2642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
2652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp1],    $ac3,           9              \n\t"
2662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 1. pixel */
2682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "srl              %[tn1],      %[tp2],         8              \n\t"
2692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "balign           %[tp2],      %[tp1],         3              \n\t"
2702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
2712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n1],       %[tp2]                         \n\t"
2722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n2],       %[tp2]                         \n\t"
2732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n3],       %[tn1]                         \n\t"
2742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp3],    $ac2,           9              \n\t"
2752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n1],          %[vector1b]    \n\t"
2762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n2],          %[vector2b]    \n\t"
2772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 2. pixel */
2792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
2802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp2],    $ac3,           9              \n\t"
2812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n2],          %[vector1b]    \n\t"
2822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n3],          %[vector2b]    \n\t"
2832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp4],    $ac2,           9              \n\t"
2842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* clamp and store results */
2862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tp1],      %[Temp1](%[cm])                \n\t"
2872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tn1],      %[Temp2](%[cm])                \n\t"
2882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tp2],      %[Temp3](%[cm])                \n\t"
2892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tp1],      0(%[dst_ptr])                  \n\t"
2902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tn1],      1(%[dst_ptr])                  \n\t"
2912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[n2],       %[Temp4](%[cm])                \n\t"
2922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tp2],      2(%[dst_ptr])                  \n\t"
2932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[n2],       3(%[dst_ptr])                  \n\t"
2942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
2952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1),
2962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
2972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
2982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
2992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
3002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
3012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr),
3022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [src_ptr] "r" (src_ptr)
3032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                );
3042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /*  Next row... */
3051b362b15af34006e6a11974088a46d42b903418eJohann                src_ptr += src_pixels_per_line;
3062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr += pitch;
3072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            }
3082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
3092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
3102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
3112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass_8_all
3132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
3141b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT src_ptr,
3151b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT dst_ptr,
3162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int src_pixels_per_line,
3172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int output_height,
3182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int xoffset,
3192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int pitch
3202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
3212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
3222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int i;
3232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int Temp1, Temp2, Temp3, Temp4;
3242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3251b362b15af34006e6a11974088a46d42b903418eJohann    unsigned int vector4a = 64;
3262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int vector1b, vector2b, vector3b;
3272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int tp1, tp2, tn1, tn2;
3282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int p1, p2, p3, p4;
3292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int n1, n2, n3, n4;
3302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
3322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3331b362b15af34006e6a11974088a46d42b903418eJohann    /* if (xoffset == 0) we don't need any filtering */
3341b362b15af34006e6a11974088a46d42b903418eJohann    if (xoffset == 0)
3351b362b15af34006e6a11974088a46d42b903418eJohann    {
3362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = 0; i < output_height; i++)
3372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
3382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* prefetch src_ptr data to cache memory */
3392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            prefetch_load(src_ptr + src_pixels_per_line);
3402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[0] = src_ptr[0];
3422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[1] = src_ptr[1];
3432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[2] = src_ptr[2];
3442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[3] = src_ptr[3];
3452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[4] = src_ptr[4];
3462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[5] = src_ptr[5];
3472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[6] = src_ptr[6];
3482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr[7] = src_ptr[7];
3492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* next row... */
3511b362b15af34006e6a11974088a46d42b903418eJohann            src_ptr += src_pixels_per_line;
3522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr += 8;
3532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
3542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
3551b362b15af34006e6a11974088a46d42b903418eJohann    else
3561b362b15af34006e6a11974088a46d42b903418eJohann    {
3572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        vector3b = sub_pel_filters_inv[xoffset][2];
3582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3591b362b15af34006e6a11974088a46d42b903418eJohann        if (vector3b > 65536)
3601b362b15af34006e6a11974088a46d42b903418eJohann        {
3612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* 6 tap filter */
3622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vector1b = sub_pel_filters_inv[xoffset][0];
3642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vector2b = sub_pel_filters_inv[xoffset][1];
3652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            for (i = output_height; i--;)
3672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            {
3682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* prefetch src_ptr data to cache memory */
3692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                prefetch_load(src_ptr + src_pixels_per_line);
3702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* apply filter with vectors pairs */
3722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                __asm__ __volatile__ (
3732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp1],      -2(%[src_ptr])                 \n\t"
3742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp2],      2(%[src_ptr])                  \n\t"
3752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 1. pixel */
3772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
3782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
3792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
3802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
3812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
3822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
3832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p3],          %[vector3b]    \n\t"
3842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 2. pixel */
3862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
3872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p1],       %[tp2]                         \n\t"
3882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
3892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
3902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p1],          %[vector3b]    \n\t"
3912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "balign           %[tp2],      %[tp1],         3              \n\t"
3932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp1],    $ac3,           9              \n\t"
3942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tn2],      3(%[src_ptr])                  \n\t"
3952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
3962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 1. pixel */
3972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
3982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n1],       %[tp2]                         \n\t"
3992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n2],       %[tp2]                         \n\t"
4002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n3],       %[tn2]                         \n\t"
4012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp3],    $ac2,           9              \n\t"
4022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n1],          %[vector1b]    \n\t"
4032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n2],          %[vector2b]    \n\t"
4042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n3],          %[vector3b]    \n\t"
4052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 2. pixel */
4072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
4082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n1],       %[tn2]                         \n\t"
4092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n2],          %[vector1b]    \n\t"
4102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n3],          %[vector2b]    \n\t"
4112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n1],          %[vector3b]    \n\t"
4122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp1],      6(%[src_ptr])                  \n\t"
4132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp2],    $ac3,           9              \n\t"
4142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
4152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p2],       %[tp1]                         \n\t"
4162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp4],    $ac2,           9              \n\t"
4172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2),
4192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
4202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
4212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
4222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
4232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
4242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector4a] "r" (vector4a), [vector3b] "r" (vector3b),
4252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [src_ptr] "r" (src_ptr)
4262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                );
4272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* clamp and store results */
4292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[0] = cm[Temp1];
4302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[1] = cm[Temp2];
4312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[2] = cm[Temp3];
4322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[3] = cm[Temp4];
4332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* next 4 pixels */
4352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                __asm__ __volatile__ (
4362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 3. pixel */
4372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p3],          %[vector1b]    \n\t"
4382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p1],          %[vector2b]    \n\t"
4392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p2],          %[vector3b]    \n\t"
4402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 4. pixel */
4422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
4432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p4],       %[tp1]                         \n\t"
4442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p1],          %[vector1b]    \n\t"
4452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p2],          %[vector2b]    \n\t"
4462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p4],          %[vector3b]    \n\t"
4472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tn1],      7(%[src_ptr])                  \n\t"
4492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp1],    $ac3,           9              \n\t"
4502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 3. pixel */
4522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
4532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n2],       %[tn1]                         \n\t"
4542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n3],          %[vector1b]    \n\t"
4552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n1],          %[vector2b]    \n\t"
4562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n2],          %[vector3b]    \n\t"
4572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp3],    $ac2,           9              \n\t"
4582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 4. pixel */
4602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
4612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n4],       %[tn1]                         \n\t"
4622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n1],          %[vector1b]    \n\t"
4632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n2],          %[vector2b]    \n\t"
4642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n4],          %[vector3b]    \n\t"
4652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp2],    $ac3,           9              \n\t"
4662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp4],    $ac2,           9              \n\t"
4672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tn1] "=&r" (tn1), [n2] "=&r" (n2),
4692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [p4] "=&r" (p4), [n4] "=&r" (n4),
4702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
4712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
4722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tp1] "r" (tp1), [vector1b] "r" (vector1b), [p2] "r" (p2),
4732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector2b] "r" (vector2b), [n1] "r" (n1), [p1] "r" (p1),
4742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector4a] "r" (vector4a), [vector3b] "r" (vector3b),
4752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr)
4762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                );
4772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* clamp and store results */
4792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[4] = cm[Temp1];
4802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[5] = cm[Temp2];
4812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[6] = cm[Temp3];
4822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[7] = cm[Temp4];
4832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4841b362b15af34006e6a11974088a46d42b903418eJohann                src_ptr += src_pixels_per_line;
4852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr += pitch;
4862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            }
4872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
4881b362b15af34006e6a11974088a46d42b903418eJohann        else
4891b362b15af34006e6a11974088a46d42b903418eJohann        {
4902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* 4 tap filter */
4912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vector1b = sub_pel_filters_inv_tap_4[xoffset][0];
4932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vector2b = sub_pel_filters_inv_tap_4[xoffset][1];
4942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
4952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            for (i = output_height; i--;)
4962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            {
4972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* prefetch src_ptr data to cache memory */
4982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                prefetch_load(src_ptr + src_pixels_per_line);
4992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* apply filter with vectors pairs */
5012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                __asm__ __volatile__ (
5022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp1],      -1(%[src_ptr])                 \n\t"
5032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 1. pixel */
5052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
5062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
5072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
5082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
5092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
5102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp2],      3(%[src_ptr])                  \n\t"
5122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 2. pixel  */
5142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
5152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
5162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p4],       %[tp2]                         \n\t"
5172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
5182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
5192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp1],    $ac3,           9              \n\t"
5202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "balign           %[tp2],      %[tp1],         3              \n\t"
5222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 1. pixel */
5242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
5252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n1],       %[tp2]                         \n\t"
5262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n2],       %[tp2]                         \n\t"
5272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n1],          %[vector1b]    \n\t"
5282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n2],          %[vector2b]    \n\t"
5292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp3],    $ac2,           9              \n\t"
5302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tn2],      4(%[src_ptr])                  \n\t"
5322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 2. pixel */
5342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
5352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n3],       %[tn2]                         \n\t"
5362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n4],       %[tn2]                         \n\t"
5372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n2],          %[vector1b]    \n\t"
5382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n3],          %[vector2b]    \n\t"
5392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp1],      7(%[src_ptr])                  \n\t"
5402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp2],    $ac3,           9              \n\t"
5412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
5422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp4],    $ac2,           9              \n\t"
5432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2),
5452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [tn2] "=&r" (tn2), [p1] "=&r" (p1), [p2] "=&r" (p2),
5462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [p3] "=&r" (p3), [p4] "=&r" (p4), [n1] "=&r" (n1),
5472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [n2] "=&r" (n2), [n3] "=&r" (n3), [n4] "=&r" (n4),
5482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
5492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
5502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
5512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
5522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                );
5532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* clamp and store results */
5552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[0] = cm[Temp1];
5562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[1] = cm[Temp2];
5572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[2] = cm[Temp3];
5582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[3] = cm[Temp4];
5592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* next 4 pixels */
5612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                __asm__ __volatile__ (
5622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 3. pixel */
5632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p3],          %[vector1b]    \n\t"
5642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p4],          %[vector2b]    \n\t"
5652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 4. pixel */
5672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
5682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p2],       %[tp1]                         \n\t"
5692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p4],          %[vector1b]    \n\t"
5702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p2],          %[vector2b]    \n\t"
5712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp1],    $ac3,           9              \n\t"
5722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 3. pixel */
5742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                           \n\t"
5752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n3],          %[vector1b]    \n\t"
5762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n4],          %[vector2b]    \n\t"
5772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tn1],      8(%[src_ptr])                  \n\t"
5782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp3],    $ac2,           9              \n\t"
5792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 4. pixel */
5812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                           \n\t"
5822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n2],       %[tn1]                         \n\t"
5832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n4],          %[vector1b]    \n\t"
5842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n2],          %[vector2b]    \n\t"
5852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp2],    $ac3,           9              \n\t"
5862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp             %[Temp4],    $ac2,           9              \n\t"
5872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2),
5892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
5902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
5912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tp1] "r" (tp1), [p3] "r" (p3), [p4] "r" (p4),
5922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
5932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr),
5942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [n3] "r" (n3), [n4] "r" (n4)
5952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                );
5962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
5972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* clamp and store results */
5982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[4] = cm[Temp1];
5992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[5] = cm[Temp2];
6002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[6] = cm[Temp3];
6012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr[7] = cm[Temp4];
6022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* next row... */
6041b362b15af34006e6a11974088a46d42b903418eJohann                src_ptr += src_pixels_per_line;
6052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                dst_ptr += pitch;
6062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            }
6072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
6082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
6092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
6102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass16_6tap
6132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
6141b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT src_ptr,
6151b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT dst_ptr,
6162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int src_pixels_per_line,
6172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int output_height,
6182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int xoffset,
6192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int pitch
6202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
6212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
6222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int i;
6232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int Temp1, Temp2, Temp3, Temp4;
6242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int vector4a;
6262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int vector1b, vector2b, vector3b;
6272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int tp1, tp2, tn1, tn2;
6282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int p1, p2, p3, p4;
6292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int n1, n2, n3, n4;
6302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
6312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector1b = sub_pel_filters_inv[xoffset][0];
6332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector2b = sub_pel_filters_inv[xoffset][1];
6342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector3b = sub_pel_filters_inv[xoffset][2];
6352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector4a = 64;
6362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    for (i = output_height; i--;)
6382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {
6392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* prefetch src_ptr data to cache memory */
6402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        prefetch_load(src_ptr + src_pixels_per_line);
6412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* apply filter with vectors pairs */
6432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        __asm__ __volatile__ (
6442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw                %[tp1],      -2(%[src_ptr])                 \n\t"
6452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw                %[tp2],      2(%[src_ptr])                  \n\t"
6462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* even 1. pixel */
6482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac3                           \n\t"
6492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[p1],       %[tp1]                         \n\t"
6502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[p2],       %[tp1]                         \n\t"
6512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[p3],       %[tp2]                         \n\t"
6522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p1],           %[vector1b]   \n\t"
6532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p2],           %[vector2b]   \n\t"
6542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p3],           %[vector3b]   \n\t"
6552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* even 2. pixel */
6572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac2                           \n\t"
6582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[p1],       %[tp2]                         \n\t"
6592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p2],           %[vector1b]   \n\t"
6602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p3],           %[vector2b]   \n\t"
6612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p1],           %[vector3b]   \n\t"
6622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "balign             %[tp2],      %[tp1],          3             \n\t"
6642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw                %[tn2],      3(%[src_ptr])                  \n\t"
6652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp1],    $ac3,            9             \n\t"
6662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* odd 1. pixel */
6682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac3                           \n\t"
6692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[n1],       %[tp2]                         \n\t"
6702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[n2],       %[tp2]                         \n\t"
6712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[n3],       %[tn2]                         \n\t"
6722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp3],    $ac2,            9             \n\t"
6732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n1],           %[vector1b]   \n\t"
6742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n2],           %[vector2b]   \n\t"
6752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n3],           %[vector3b]   \n\t"
6762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* odd 2. pixel */
6782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac2                           \n\t"
6792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[n1],       %[tn2]                         \n\t"
6802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n2],           %[vector1b]   \n\t"
6812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n3],           %[vector2b]   \n\t"
6822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n1],           %[vector3b]   \n\t"
6832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw                %[tp1],      6(%[src_ptr])                  \n\t"
6842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp2],    $ac3,            9             \n\t"
6852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac3                           \n\t"
6862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[p2],       %[tp1]                         \n\t"
6872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp4],    $ac2,            9             \n\t"
6882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn2] "=&r" (tn2),
6902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
6912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
6922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
6932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
6942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
6952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [vector4a] "r" (vector4a), [vector3b] "r" (vector3b),
6962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [src_ptr] "r" (src_ptr)
6972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        );
6982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
6992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* clamp and store results */
7002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[0] = cm[Temp1];
7012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[1] = cm[Temp2];
7022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[2] = cm[Temp3];
7032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[3] = cm[Temp4];
7042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* next 4 pixels */
7062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        __asm__ __volatile__ (
7072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* even 3. pixel */
7082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p3],           %[vector1b]   \n\t"
7092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p1],           %[vector2b]   \n\t"
7102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p2],           %[vector3b]   \n\t"
7112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* even 4. pixel */
7132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac2                           \n\t"
7142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[p4],       %[tp1]                         \n\t"
7152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p1],           %[vector1b]   \n\t"
7162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p2],           %[vector2b]   \n\t"
7172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p4],           %[vector3b]   \n\t"
7182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw                %[tn1],      7(%[src_ptr])                  \n\t"
7192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp1],    $ac3,            9             \n\t"
7202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* odd 3. pixel */
7222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac3                           \n\t"
7232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[n2],       %[tn1]                         \n\t"
7242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n3],           %[vector1b]   \n\t"
7252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n1],           %[vector2b]   \n\t"
7262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n2],           %[vector3b]   \n\t"
7272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp3],    $ac2,            9             \n\t"
7282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* odd 4. pixel */
7302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac2                           \n\t"
7312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[n4],       %[tn1]                         \n\t"
7322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n1],           %[vector1b]   \n\t"
7332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n2],           %[vector2b]   \n\t"
7342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n4],           %[vector3b]   \n\t"
7352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw                %[tp2],      10(%[src_ptr])                 \n\t"
7362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp2],    $ac3,            9             \n\t"
7372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac3                           \n\t"
7382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[p1],       %[tp2]                         \n\t"
7392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp4],    $ac2,            9             \n\t"
7402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [tn1] "=&r" (tn1), [tp2] "=&r" (tp2), [n2] "=&r" (n2),
7422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [p4] "=&r" (p4), [n4] "=&r" (n4),
7432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
7442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
7452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
7462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [tp1] "r" (tp1), [n1] "r" (n1), [p1] "r" (p1),
7472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [vector4a] "r" (vector4a), [p2] "r" (p2), [vector3b] "r" (vector3b),
7482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [p3] "r" (p3), [n3] "r" (n3), [src_ptr] "r" (src_ptr)
7492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        );
7502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* clamp and store results */
7522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[4] = cm[Temp1];
7532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[5] = cm[Temp2];
7542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[6] = cm[Temp3];
7552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[7] = cm[Temp4];
7562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* next 4 pixels */
7582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        __asm__ __volatile__ (
7592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* even 5. pixel */
7602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p2],           %[vector1b]   \n\t"
7612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p4],           %[vector2b]   \n\t"
7622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p1],           %[vector3b]   \n\t"
7632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* even 6. pixel */
7652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac2                           \n\t"
7662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[p3],       %[tp2]                         \n\t"
7672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p4],           %[vector1b]   \n\t"
7682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p1],           %[vector2b]   \n\t"
7692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p3],           %[vector3b]   \n\t"
7702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw                %[tn1],      11(%[src_ptr])                 \n\t"
7722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp1],    $ac3,            9             \n\t"
7732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* odd 5. pixel */
7752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac3                           \n\t"
7762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[n1],       %[tn1]                         \n\t"
7772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n2],           %[vector1b]   \n\t"
7782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n4],           %[vector2b]   \n\t"
7792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n1],           %[vector3b]   \n\t"
7802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp3],    $ac2,            9             \n\t"
7812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* odd 6. pixel */
7832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac2                           \n\t"
7842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[n3],       %[tn1]                         \n\t"
7852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n4],           %[vector1b]   \n\t"
7862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n1],           %[vector2b]   \n\t"
7872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n3],           %[vector3b]   \n\t"
7882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw                %[tp1],      14(%[src_ptr])                 \n\t"
7892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp2],    $ac3,            9             \n\t"
7902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac3                           \n\t"
7912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[p4],       %[tp1]                         \n\t"
7922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp4],    $ac2,            9             \n\t"
7932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
7942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [tn1] "=&r" (tn1), [tp1] "=&r" (tp1),
7952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [n1] "=&r" (n1), [p3] "=&r" (p3), [n3] "=&r" (n3),
7962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
7972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
7982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
7992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [tp2] "r" (tp2), [p2] "r" (p2), [n2] "r" (n2),
8002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [p4] "r" (p4), [n4] "r" (n4), [p1] "r" (p1), [src_ptr] "r" (src_ptr),
8012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [vector4a] "r" (vector4a), [vector3b] "r" (vector3b)
8022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        );
8032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* clamp and store results */
8052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[8] = cm[Temp1];
8062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[9] = cm[Temp2];
8072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[10] = cm[Temp3];
8082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr[11] = cm[Temp4];
8092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* next 4 pixels */
8112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        __asm__ __volatile__ (
8122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* even 7. pixel */
8132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p1],           %[vector1b]   \n\t"
8142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p3],           %[vector2b]   \n\t"
8152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[p4],           %[vector3b]   \n\t"
8162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* even 8. pixel */
8182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac2                           \n\t"
8192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[p2],       %[tp1]                         \n\t"
8202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p3],           %[vector1b]   \n\t"
8212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p4],           %[vector2b]   \n\t"
8222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[p2],           %[vector3b]   \n\t"
8231b362b15af34006e6a11974088a46d42b903418eJohann            "ulw                %[tn1],      15(%[src_ptr])                 \n\t"
8242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp1],    $ac3,            9             \n\t"
8252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* odd 7. pixel */
8272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac3                           \n\t"
8282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbr      %[n4],       %[tn1]                         \n\t"
8292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n1],           %[vector1b]   \n\t"
8302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n3],           %[vector2b]   \n\t"
8312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac3,        %[n4],           %[vector3b]   \n\t"
8322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp3],    $ac2,            9             \n\t"
8332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* odd 8. pixel */
8352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "mtlo               %[vector4a], $ac2                           \n\t"
8362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "preceu.ph.qbl      %[n2],       %[tn1]                         \n\t"
8372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n3],           %[vector1b]   \n\t"
8382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n4],           %[vector2b]   \n\t"
8392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "dpa.w.ph           $ac2,        %[n2],           %[vector3b]   \n\t"
8402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp2],    $ac3,            9             \n\t"
8412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "extp               %[Temp4],    $ac2,            9             \n\t"
8422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* clamp and store results */
8442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "lbux               %[tp1],      %[Temp1](%[cm])                \n\t"
8452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "lbux               %[tn1],      %[Temp2](%[cm])                \n\t"
8462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "lbux               %[p2],       %[Temp3](%[cm])                \n\t"
8472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sb                 %[tp1],      12(%[dst_ptr])                 \n\t"
8482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sb                 %[tn1],      13(%[dst_ptr])                 \n\t"
8492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "lbux               %[n2],       %[Temp4](%[cm])                \n\t"
8502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sb                 %[p2],       14(%[dst_ptr])                 \n\t"
8512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sb                 %[n2],       15(%[dst_ptr])                 \n\t"
8522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [tn1] "=&r" (tn1), [p2] "=&r" (p2), [n2] "=&r" (n2), [n4] "=&r" (n4),
8542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
8552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4)
8562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
8572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [tp1] "r" (tp1), [p4] "r" (p4), [n1] "r" (n1), [p1] "r" (p1),
8582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [vector4a] "r" (vector4a), [vector3b] "r" (vector3b), [p3] "r" (p3),
8592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [n3] "r" (n3), [src_ptr] "r" (src_ptr),
8602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [cm] "r" (cm), [dst_ptr] "r" (dst_ptr)
8612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        );
8622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8631b362b15af34006e6a11974088a46d42b903418eJohann        src_ptr += src_pixels_per_line;
8642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        dst_ptr += pitch;
8652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
8662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
8672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass16_0
8702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
8711b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT src_ptr,
8721b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT output_ptr,
8732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int src_pixels_per_line
8742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
8752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
8762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int Temp1, Temp2, Temp3, Temp4;
8772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int i;
8782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* prefetch src_ptr data to cache memory */
8802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    prefetch_store(output_ptr + 32);
8812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* copy memory from src buffer to dst buffer */
8832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    for (i = 0; i < 7; i++)
8842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    {
8852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        __asm__ __volatile__ (
8862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp1],   0(%[src_ptr])                               \n\t"
8872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp2],   4(%[src_ptr])                               \n\t"
8882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp3],   8(%[src_ptr])                               \n\t"
8892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp4],   12(%[src_ptr])                              \n\t"
8902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp1],   0(%[output_ptr])                            \n\t"
8912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp2],   4(%[output_ptr])                            \n\t"
8922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp3],   8(%[output_ptr])                            \n\t"
8932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp4],   12(%[output_ptr])                           \n\t"
8942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "addu   %[src_ptr], %[src_ptr],        %[src_pixels_per_line]   \n\t"
8952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
8962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
8972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr)
8982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [src_pixels_per_line] "r" (src_pixels_per_line),
8992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [output_ptr] "r" (output_ptr)
9002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        );
9012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        __asm__ __volatile__ (
9032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp1],   0(%[src_ptr])                               \n\t"
9042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp2],   4(%[src_ptr])                               \n\t"
9052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp3],   8(%[src_ptr])                               \n\t"
9062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp4],   12(%[src_ptr])                              \n\t"
9072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp1],   16(%[output_ptr])                           \n\t"
9082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp2],   20(%[output_ptr])                           \n\t"
9092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp3],   24(%[output_ptr])                           \n\t"
9102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp4],   28(%[output_ptr])                           \n\t"
9112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "addu   %[src_ptr], %[src_ptr],        %[src_pixels_per_line]   \n\t"
9122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
9142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr)
9152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [src_pixels_per_line] "r" (src_pixels_per_line),
9162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [output_ptr] "r" (output_ptr)
9172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        );
9182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        __asm__ __volatile__ (
9202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp1],   0(%[src_ptr])                               \n\t"
9212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp2],   4(%[src_ptr])                               \n\t"
9222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp3],   8(%[src_ptr])                               \n\t"
9232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "ulw    %[Temp4],   12(%[src_ptr])                              \n\t"
9242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp1],   32(%[output_ptr])                           \n\t"
9252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp2],   36(%[output_ptr])                           \n\t"
9262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp3],   40(%[output_ptr])                           \n\t"
9272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "sw     %[Temp4],   44(%[output_ptr])                           \n\t"
9282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            "addu   %[src_ptr], %[src_ptr],        %[src_pixels_per_line]   \n\t"
9292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
9312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [Temp4] "=&r" (Temp4), [src_ptr] "+r" (src_ptr)
9322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            : [src_pixels_per_line] "r" (src_pixels_per_line),
9332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan              [output_ptr] "r" (output_ptr)
9342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        );
9352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        output_ptr += 48;
9372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
9382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
9392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_first_pass16_4tap
9422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
9431b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT src_ptr,
9441b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT output_ptr,
9452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int src_pixels_per_line,
9462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int output_width,
9472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int output_height,
9482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int xoffset,
9492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int yoffset,
9501b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT dst_ptr,
9512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int pitch
9522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
9532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
9542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int i, j;
9552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int Temp1, Temp2, Temp3, Temp4;
9562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int vector4a;
9582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int vector1b, vector2b;
9592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int tp1, tp2, tp3, tn1;
9602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int p1, p2, p3;
9612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int n1, n2, n3;
9622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
9632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector4a = 64;
9652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector1b = sub_pel_filters_inv_tap_4[xoffset][0];
9672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector2b = sub_pel_filters_inv_tap_4[xoffset][1];
9682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* if (yoffset == 0) don't need temp buffer, data will be stored in dst_ptr */
9701b362b15af34006e6a11974088a46d42b903418eJohann    if (yoffset == 0)
9711b362b15af34006e6a11974088a46d42b903418eJohann    {
9722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        output_height -= 5;
9732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        src_ptr += (src_pixels_per_line + src_pixels_per_line);
9742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = output_height; i--;)
9762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
9772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
9782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw     %[tp3],   -1(%[src_ptr])               \n\t"
9792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [tp3] "=&r" (tp3)
9802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [src_ptr] "r" (src_ptr)
9812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
9822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* processing 4 adjacent pixels */
9842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            for (j = 0; j < 16; j += 4)
9852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            {
9862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* apply filter with vectors pairs */
9872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                __asm__ __volatile__ (
9882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp2],      3(%[src_ptr])                    \n\t"
9892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "move             %[tp1],      %[tp3]                           \n\t"
9902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
9912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 1. pixel */
9922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                             \n\t"
9932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mthi             $0,          $ac3                             \n\t"
9942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "move             %[tp3],      %[tp2]                           \n\t"
9952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p1],       %[tp1]                           \n\t"
9962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p2],       %[tp1]                           \n\t"
9972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p3],       %[tp2]                           \n\t"
9982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p1],           %[vector1b]     \n\t"
9992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p2],           %[vector2b]     \n\t"
10002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 2. pixel */
10022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                             \n\t"
10032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mthi             $0,          $ac2                             \n\t"
10042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p2],           %[vector1b]     \n\t"
10052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p3],           %[vector2b]     \n\t"
10062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extr.w           %[Temp1],    $ac3,            7               \n\t"
10072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 1. pixel */
10092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tn1],      4(%[src_ptr])                    \n\t"
10102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "balign           %[tp2],      %[tp1],          3               \n\t"
10112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                             \n\t"
10122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mthi             $0,          $ac3                             \n\t"
10132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n1],       %[tp2]                           \n\t"
10142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n2],       %[tp2]                           \n\t"
10152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n3],       %[tn1]                           \n\t"
10162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extr.w           %[Temp3],    $ac2,            7               \n\t"
10172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n1],           %[vector1b]     \n\t"
10182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n2],           %[vector2b]     \n\t"
10192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 2. pixel */
10212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                             \n\t"
10222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mthi             $0,          $ac2                             \n\t"
10232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extr.w           %[Temp2],    $ac3,            7               \n\t"
10242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n2],           %[vector1b]     \n\t"
10252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n3],           %[vector2b]     \n\t"
10262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extr.w           %[Temp4],    $ac2,            7               \n\t"
10272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* clamp and store results */
10292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tp1],      %[Temp1](%[cm])                  \n\t"
10302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tn1],      %[Temp2](%[cm])                  \n\t"
10312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tp2],      %[Temp3](%[cm])                  \n\t"
10322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tp1],      0(%[dst_ptr])                    \n\t"
10332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tn1],      1(%[dst_ptr])                    \n\t"
10342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[n2],       %[Temp4](%[cm])                  \n\t"
10352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tp2],      2(%[dst_ptr])                    \n\t"
10362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[n2],       3(%[dst_ptr])                    \n\t"
10372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tp3] "=&r" (tp3),
10392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [tn1] "=&r" (tn1), [p1] "=&r" (p1), [p2] "=&r" (p2),
10402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
10412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [p3] "=&r" (p3),
10422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
10432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
10442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector4a] "r" (vector4a), [cm] "r" (cm), [dst_ptr] "r" (dst_ptr),
10452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [src_ptr] "r" (src_ptr)
10462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                );
10472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10481b362b15af34006e6a11974088a46d42b903418eJohann                src_ptr += 4;
10492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            }
10502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* Next row... */
10521b362b15af34006e6a11974088a46d42b903418eJohann            src_ptr += src_pixels_per_line - 16;
10532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            dst_ptr += pitch;
10542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
10552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
10561b362b15af34006e6a11974088a46d42b903418eJohann    else
10571b362b15af34006e6a11974088a46d42b903418eJohann    {
10582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = output_height; i--;)
10592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
10602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* processing 4 adjacent pixels */
10612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            for (j = 0; j < 16; j += 4)
10622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            {
10632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* apply filter with vectors pairs */
10642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                __asm__ __volatile__ (
10652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp1],      -1(%[src_ptr])                   \n\t"
10662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tp2],      3(%[src_ptr])                    \n\t"
10672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 1. pixel */
10692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                             \n\t"
10702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mthi             $0,          $ac3                             \n\t"
10712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p1],       %[tp1]                           \n\t"
10722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[p2],       %[tp1]                           \n\t"
10732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[p3],       %[tp2]                           \n\t"
10742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p1],           %[vector1b]     \n\t"
10752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[p2],           %[vector2b]     \n\t"
10762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* even 2. pixel */
10782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                             \n\t"
10792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mthi             $0,          $ac2                             \n\t"
10802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p2],           %[vector1b]     \n\t"
10812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[p3],           %[vector2b]     \n\t"
10822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extr.w           %[Temp1],    $ac3,            7               \n\t"
10832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 1. pixel */
10852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "ulw              %[tn1],      4(%[src_ptr])                    \n\t"
10862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "balign           %[tp2],      %[tp1],          3               \n\t"
10872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac3                             \n\t"
10882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mthi             $0,          $ac3                             \n\t"
10892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n1],       %[tp2]                           \n\t"
10902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbl    %[n2],       %[tp2]                           \n\t"
10912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "preceu.ph.qbr    %[n3],       %[tn1]                           \n\t"
10922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extr.w           %[Temp3],    $ac2,            7               \n\t"
10932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n1],           %[vector1b]     \n\t"
10942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac3,        %[n2],           %[vector2b]     \n\t"
10952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
10962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* odd 2. pixel */
10972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo             %[vector4a], $ac2                             \n\t"
10982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mthi             $0,          $ac2                             \n\t"
10992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extr.w           %[Temp2],    $ac3,            7               \n\t"
11002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n2],           %[vector1b]     \n\t"
11012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpa.w.ph         $ac2,        %[n3],           %[vector2b]     \n\t"
11022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extr.w           %[Temp4],    $ac2,            7               \n\t"
11032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    /* clamp and store results */
11052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tp1],      %[Temp1](%[cm])                  \n\t"
11062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tn1],      %[Temp2](%[cm])                  \n\t"
11072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[tp2],      %[Temp3](%[cm])                  \n\t"
11082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tp1],      0(%[output_ptr])                 \n\t"
11092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tn1],      1(%[output_ptr])                 \n\t"
11102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbux             %[n2],       %[Temp4](%[cm])                  \n\t"
11112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[tp2],      2(%[output_ptr])                 \n\t"
11122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "sb               %[n2],       3(%[output_ptr])                 \n\t"
11132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [tp1] "=&r" (tp1), [tp2] "=&r" (tp2), [tn1] "=&r" (tn1),
11152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3),
11162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [n1] "=&r" (n1), [n2] "=&r" (n2), [n3] "=&r" (n3),
11172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
11182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4)
11192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
11202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector4a] "r" (vector4a), [cm] "r" (cm),
11212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [output_ptr] "r" (output_ptr), [src_ptr] "r" (src_ptr)
11222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                );
11232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11241b362b15af34006e6a11974088a46d42b903418eJohann                src_ptr += 4;
11252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            }
11262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* next row... */
11281b362b15af34006e6a11974088a46d42b903418eJohann            src_ptr += src_pixels_per_line;
11292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr += output_width;
11302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
11312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
11322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
11332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_second_pass4
11362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
11371b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT src_ptr,
11381b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT output_ptr,
11392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int output_pitch,
11402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int yoffset
11412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
11422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
11432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int i;
11442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int Temp1, Temp2, Temp3, Temp4;
11462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int vector1b, vector2b, vector3b, vector4a;
11472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_l2;
11492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_l1;
11502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_0;
11512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_r1;
11522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_r2;
11532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_r3;
11542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
11562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector4a = 64;
11582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* load filter coefficients */
11602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector1b = sub_pel_filterss[yoffset][0];
11612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector2b = sub_pel_filterss[yoffset][2];
11622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector3b = sub_pel_filterss[yoffset][1];
11632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11641b362b15af34006e6a11974088a46d42b903418eJohann    if (vector1b)
11651b362b15af34006e6a11974088a46d42b903418eJohann    {
11662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* 6 tap filter */
11672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = 2; i--;)
11692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
11701b362b15af34006e6a11974088a46d42b903418eJohann            /* prefetch src_ptr data to cache memory */
11711b362b15af34006e6a11974088a46d42b903418eJohann            prefetch_load(src_ptr);
11722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* do not allow compiler to reorder instructions */
11742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
11752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                ".set noreorder                                                 \n\t"
11762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                :
11772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                :
11782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
11792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* apply filter with vectors pairs */
11812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
11822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -8(%[src_ptr])                  \n\t"
11832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -4(%[src_ptr])                  \n\t"
11842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
11852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  4(%[src_ptr])                   \n\t"
11862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  8(%[src_ptr])                   \n\t"
11872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  12(%[src_ptr])                  \n\t"
11882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
11892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
11912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
11922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
11932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
11942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
11952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
11962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
11972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -7(%[src_ptr])                  \n\t"
11982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -3(%[src_ptr])                  \n\t"
11992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
12002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  5(%[src_ptr])                   \n\t"
12012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  9(%[src_ptr])                   \n\t"
12022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  13(%[src_ptr])                  \n\t"
12032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
12042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp1],       $ac2,           9               \n\t"
12052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
12072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
12082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
12092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
12102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
12112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
12122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -6(%[src_ptr])                  \n\t"
12142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -2(%[src_ptr])                  \n\t"
12152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
12162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  6(%[src_ptr])                   \n\t"
12172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  10(%[src_ptr])                  \n\t"
12182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  14(%[src_ptr])                  \n\t"
12192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
12202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp2],       $ac3,           9               \n\t"
12212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
12232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
12242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
12252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
12262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
12272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
12282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -5(%[src_ptr])                  \n\t"
12302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -1(%[src_ptr])                  \n\t"
12312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
12322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  7(%[src_ptr])                   \n\t"
12332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  11(%[src_ptr])                  \n\t"
12342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  15(%[src_ptr])                  \n\t"
12352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
12362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp3],       $ac0,           9               \n\t"
12372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
12392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
12402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
12412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
12422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
12432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
12442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp4],       $ac1,           9               \n\t"
12452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
12472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
12482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
12492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
12502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
12512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
12522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
12532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr] "r" (src_ptr)
12542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
12552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* clamp and store results */
12572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[0] = cm[Temp1];
12582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[1] = cm[Temp2];
12592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[2] = cm[Temp3];
12602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[3] = cm[Temp4];
12612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr += output_pitch;
12632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* apply filter with vectors pairs */
12652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
12662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -4(%[src_ptr])                  \n\t"
12672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  0(%[src_ptr])                   \n\t"
12682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
12692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  8(%[src_ptr])                   \n\t"
12702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  12(%[src_ptr])                  \n\t"
12712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  16(%[src_ptr])                  \n\t"
12722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
12732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
12742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
12752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
12762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
12772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
12782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
12792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -3(%[src_ptr])                  \n\t"
12812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  1(%[src_ptr])                   \n\t"
12822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
12832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  9(%[src_ptr])                   \n\t"
12842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  13(%[src_ptr])                  \n\t"
12852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  17(%[src_ptr])                  \n\t"
12862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
12872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp1],       $ac2,           9               \n\t"
12882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
12902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
12912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
12922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
12932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
12942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
12952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
12962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -2(%[src_ptr])                  \n\t"
12972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  2(%[src_ptr])                   \n\t"
12982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
12992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  10(%[src_ptr])                  \n\t"
13002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  14(%[src_ptr])                  \n\t"
13012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  18(%[src_ptr])                  \n\t"
13022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
13032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp2],       $ac3,           9               \n\t"
13042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
13062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
13072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
13082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
13092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
13102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
13112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -1(%[src_ptr])                  \n\t"
13132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  3(%[src_ptr])                   \n\t"
13142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
13152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  11(%[src_ptr])                  \n\t"
13162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  15(%[src_ptr])                  \n\t"
13172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  19(%[src_ptr])                  \n\t"
13182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
13192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp3],       $ac0,           9               \n\t"
13202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
13222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
13232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
13242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
13252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
13262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
13272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp4],       $ac1,           9               \n\t"
13282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
13302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
13312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
13322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
13332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
13342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
13352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
13362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr] "r" (src_ptr)
13372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
13382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* clamp and store results */
13402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[0] = cm[Temp1];
13412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[1] = cm[Temp2];
13422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[2] = cm[Temp3];
13432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[3] = cm[Temp4];
13442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr += 8;
13462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr += output_pitch;
13472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
13482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
13491b362b15af34006e6a11974088a46d42b903418eJohann    else
13501b362b15af34006e6a11974088a46d42b903418eJohann    {
13512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* 4 tap filter */
13522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* prefetch src_ptr data to cache memory */
13542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        prefetch_load(src_ptr);
13552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = 2; i--;)
13572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
13582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* do not allow compiler to reorder instructions */
13592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
13601b362b15af34006e6a11974088a46d42b903418eJohann                ".set noreorder                                                 \n\t"
13612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                :
13622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                :
13632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
13642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* apply filter with vectors pairs */
13662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
13672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -4(%[src_ptr])                  \n\t"
13682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
13692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  4(%[src_ptr])                   \n\t"
13702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  8(%[src_ptr])                   \n\t"
13712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
13722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
13732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
13742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
13752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
13762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -3(%[src_ptr])                  \n\t"
13782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
13792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  5(%[src_ptr])                   \n\t"
13802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  9(%[src_ptr])                   \n\t"
13812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
13822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp1],       $ac2,           9               \n\t"
13832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
13852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
13862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
13872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
13882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -2(%[src_ptr])                  \n\t"
13902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
13912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  6(%[src_ptr])                   \n\t"
13922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  10(%[src_ptr])                  \n\t"
13932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
13942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp2],       $ac3,           9               \n\t"
13952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
13962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
13972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
13982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
13992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
14002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -1(%[src_ptr])                  \n\t"
14022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
14032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  7(%[src_ptr])                   \n\t"
14042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  11(%[src_ptr])                  \n\t"
14052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
14062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp3],       $ac0,           9               \n\t"
14072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
14082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
14092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
14102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
14112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp4],       $ac1,           9               \n\t"
14122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
14142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
14152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
14162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
14172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
14182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
14192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
14202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* clamp and store results */
14222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[0] = cm[Temp1];
14232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[1] = cm[Temp2];
14242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[2] = cm[Temp3];
14252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[3] = cm[Temp4];
14262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr += output_pitch;
14282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* apply filter with vectors pairs */
14302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
14312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  0(%[src_ptr])                   \n\t"
14322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
14332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  8(%[src_ptr])                   \n\t"
14342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  12(%[src_ptr])                  \n\t"
14352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
14362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
14372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
14382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
14392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
14402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  1(%[src_ptr])                   \n\t"
14422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
14432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  9(%[src_ptr])                   \n\t"
14442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  13(%[src_ptr])                  \n\t"
14452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
14462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp1],       $ac2,           9               \n\t"
14472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
14492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
14502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
14512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
14522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  2(%[src_ptr])                   \n\t"
14542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
14552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  10(%[src_ptr])                  \n\t"
14562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  14(%[src_ptr])                  \n\t"
14572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
14582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp2],       $ac3,           9               \n\t"
14592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
14612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
14622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
14632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
14642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  3(%[src_ptr])                   \n\t"
14662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
14672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  11(%[src_ptr])                  \n\t"
14682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  15(%[src_ptr])                  \n\t"
14692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
14702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp3],       $ac0,           9               \n\t"
14712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
14722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
14732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
14742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
14752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp4],       $ac1,           9               \n\t"
14762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
14782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp3] "=&r" (Temp3), [Temp4] "=r" (Temp4),
14792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
14802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
14812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
14822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
14832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
14842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* clamp and store results */
14862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[0] = cm[Temp1];
14872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[1] = cm[Temp2];
14882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[2] = cm[Temp3];
14892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[3] = cm[Temp4];
14902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr += 8;
14922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr += output_pitch;
14932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
14942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
14952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
14962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
14982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_second_pass_8
14992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
15001b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT src_ptr,
15011b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT output_ptr,
15022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int output_pitch,
15032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int output_height,
15042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int output_width,
15052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int yoffset
15062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
15072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
15081b362b15af34006e6a11974088a46d42b903418eJohann    unsigned int i;
15092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8;
15112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int vector1b, vector2b, vector3b, vector4a;
15122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_l2;
15142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_l1;
15152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_0;
15162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_r1;
15172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_r2;
15182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_r3;
15192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
15202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector4a = 64;
15222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector1b = sub_pel_filterss[yoffset][0];
15242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector2b = sub_pel_filterss[yoffset][2];
15252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector3b = sub_pel_filterss[yoffset][1];
15262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15271b362b15af34006e6a11974088a46d42b903418eJohann    if (vector1b)
15281b362b15af34006e6a11974088a46d42b903418eJohann    {
15292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* 6 tap filter */
15302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* prefetch src_ptr data to cache memory */
15322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        prefetch_load(src_ptr);
15332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = output_height; i--;)
15352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
15362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* apply filter with vectors pairs */
15372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
15382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -16(%[src_ptr])                 \n\t"
15392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -8(%[src_ptr])                  \n\t"
15402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
15412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  8(%[src_ptr])                   \n\t"
15422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  16(%[src_ptr])                  \n\t"
15432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  24(%[src_ptr])                  \n\t"
15442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
15452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
15472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
15482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
15492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
15502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
15512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
15522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -15(%[src_ptr])                 \n\t"
15542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -7(%[src_ptr])                  \n\t"
15552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
15562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  9(%[src_ptr])                   \n\t"
15572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  17(%[src_ptr])                  \n\t"
15582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  25(%[src_ptr])                  \n\t"
15592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
15602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp1],       $ac2,           9               \n\t"
15612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
15632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
15642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
15652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
15662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
15672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
15682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -14(%[src_ptr])                 \n\t"
15702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -6(%[src_ptr])                  \n\t"
15712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
15722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  10(%[src_ptr])                  \n\t"
15732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  18(%[src_ptr])                  \n\t"
15742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  26(%[src_ptr])                  \n\t"
15752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
15762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp2],       $ac3,           9               \n\t"
15772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
15792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
15802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
15812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
15822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
15832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
15842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -13(%[src_ptr])                 \n\t"
15862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -5(%[src_ptr])                  \n\t"
15872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
15882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  11(%[src_ptr])                  \n\t"
15892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  19(%[src_ptr])                  \n\t"
15902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  27(%[src_ptr])                  \n\t"
15912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
15922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp3],       $ac0,           9               \n\t"
15932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
15942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
15952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
15962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
15972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
15982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
15992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
16002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16011b362b15af34006e6a11974088a46d42b903418eJohann                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2), [Temp3] "=&r" (Temp3),
16021b362b15af34006e6a11974088a46d42b903418eJohann                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
16031b362b15af34006e6a11974088a46d42b903418eJohann                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
16041b362b15af34006e6a11974088a46d42b903418eJohann                  [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
16051b362b15af34006e6a11974088a46d42b903418eJohann                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
16061b362b15af34006e6a11974088a46d42b903418eJohann                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
16071b362b15af34006e6a11974088a46d42b903418eJohann                  [src_ptr] "r" (src_ptr)
16082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
16092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* apply filter with vectors pairs */
16112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
16122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -12(%[src_ptr])                 \n\t"
16132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -4(%[src_ptr])                  \n\t"
16142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
16152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  12(%[src_ptr])                  \n\t"
16162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  20(%[src_ptr])                  \n\t"
16172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  28(%[src_ptr])                  \n\t"
16182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
16192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
16212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
16222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
16232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
16242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
16252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
16262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp4],       $ac1,           9               \n\t"
16272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -11(%[src_ptr])                 \n\t"
16292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -3(%[src_ptr])                  \n\t"
16302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
16312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  13(%[src_ptr])                  \n\t"
16322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  21(%[src_ptr])                  \n\t"
16332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  29(%[src_ptr])                  \n\t"
16342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
16352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp5],       $ac2,           9               \n\t"
16362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
16382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
16392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
16402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
16412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
16422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
16432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -10(%[src_ptr])                 \n\t"
16452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -2(%[src_ptr])                  \n\t"
16462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
16472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  14(%[src_ptr])                  \n\t"
16482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  22(%[src_ptr])                  \n\t"
16492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  30(%[src_ptr])                  \n\t"
16502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
16512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp6],       $ac3,           9               \n\t"
16522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
16542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
16552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
16562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
16572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
16582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
16592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -9(%[src_ptr])                  \n\t"
16612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -1(%[src_ptr])                  \n\t"
16622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
16632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  15(%[src_ptr])                  \n\t"
16642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  23(%[src_ptr])                  \n\t"
16652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  31(%[src_ptr])                  \n\t"
16662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
16672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp7],       $ac0,           9               \n\t"
16682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
16702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
16712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
16722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
16732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
16742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
16752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp8],       $ac1,           9               \n\t"
16762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp4] "=&r" (Temp4), [Temp5] "=&r" (Temp5),
16782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp6] "=&r" (Temp6), [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
16792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
16802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
16812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3)
16822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
16832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
16842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr] "r" (src_ptr)
16852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
16862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* clamp and store results */
16882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[0] = cm[Temp1];
16892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[1] = cm[Temp2];
16902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[2] = cm[Temp3];
16912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[3] = cm[Temp4];
16922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[4] = cm[Temp5];
16932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[5] = cm[Temp6];
16942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[6] = cm[Temp7];
16952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[7] = cm[Temp8];
16962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
16972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr += 8;
16982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr += output_pitch;
16992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
17002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
17011b362b15af34006e6a11974088a46d42b903418eJohann    else
17021b362b15af34006e6a11974088a46d42b903418eJohann    {
17032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* 4 tap filter */
17042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* prefetch src_ptr data to cache memory */
17062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        prefetch_load(src_ptr);
17072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = output_height; i--;)
17092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
17102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
17112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -8(%[src_ptr])                  \n\t"
17122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
17132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  8(%[src_ptr])                   \n\t"
17142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  16(%[src_ptr])                  \n\t"
17152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
17162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
17172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
17182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
17192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
17202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
17222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
17232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
17242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
17252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
17262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
17282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -7(%[src_ptr])                  \n\t"
17292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
17302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  9(%[src_ptr])                   \n\t"
17312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  17(%[src_ptr])                  \n\t"
17322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
17332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
17342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
17352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
17362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
17372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp1],       $ac2,           9               \n\t"
17382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=r" (Temp1),
17402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
17412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
17422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
17432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
17442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
17452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_l1 = src_ptr[-6];
17472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_0  = src_ptr[2];
17482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r1 = src_ptr[10];
17492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r2 = src_ptr[18];
17502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
17522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
17532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
17542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
17552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
17562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
17572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp2],       $ac3,           9               \n\t"
17582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp2] "=r" (Temp2)
17602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
17612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
17622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
17632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a)
17642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
17652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_l1 = src_ptr[-5];
17672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_0  = src_ptr[3];
17682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r1 = src_ptr[11];
17692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r2 = src_ptr[19];
17702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
17722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
17732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
17742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
17752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
17762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
17772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp3],       $ac0,           9               \n\t"
17782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp3] "=r" (Temp3)
17802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
17812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
17822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
17832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a)
17842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
17852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_l1 = src_ptr[-4];
17872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_0  = src_ptr[4];
17882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r1 = src_ptr[12];
17892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r2 = src_ptr[20];
17902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
17922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
17932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
17942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
17952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
17962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
17972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp4],       $ac1,           9               \n\t"
17982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
17992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp4] "=r" (Temp4)
18002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
18012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
18022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
18032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a)
18042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
18052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_l1 = src_ptr[-3];
18072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_0  = src_ptr[5];
18082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r1 = src_ptr[13];
18092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r2 = src_ptr[21];
18102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
18122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
18132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
18142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
18152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
18162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
18172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp5],       $ac2,           9               \n\t"
18182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp5] "=&r" (Temp5)
18202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
18212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
18222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
18232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a)
18242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
18252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_l1 = src_ptr[-2];
18272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_0  = src_ptr[6];
18282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r1 = src_ptr[14];
18292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r2 = src_ptr[22];
18302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
18322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
18332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
18342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
18352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
18362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
18372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp6],       $ac3,           9               \n\t"
18382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp6] "=r" (Temp6)
18402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
18412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
18422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
18432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a)
18442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
18452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_l1 = src_ptr[-1];
18472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_0  = src_ptr[7];
18482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r1 = src_ptr[15];
18492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr_r2 = src_ptr[23];
18502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
18522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
18532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
18542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
18552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
18562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
18572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp7],       $ac0,           9               \n\t"
18582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp8],       $ac1,           9               \n\t"
18592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8)
18612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
18622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "r" (src_ptr_l1), [src_ptr_0] "r" (src_ptr_0),
18632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "r" (src_ptr_r1), [src_ptr_r2] "r" (src_ptr_r2),
18642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector4a] "r" (vector4a)
18652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
18662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* clamp and store results */
18682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[0] = cm[Temp1];
18692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[1] = cm[Temp2];
18702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[2] = cm[Temp3];
18712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[3] = cm[Temp4];
18722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[4] = cm[Temp5];
18732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[5] = cm[Temp6];
18742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[6] = cm[Temp7];
18752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[7] = cm[Temp8];
18762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr += 8;
18782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr += output_pitch;
18792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
18802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
18812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
18822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjanvoid vp8_filter_block2d_second_pass161
18852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
18861b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT src_ptr,
18871b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT output_ptr,
18882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int output_pitch,
18892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    const unsigned short *vp8_filter
18902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
18912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
18922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int i, j;
18932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8;
18952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int vector4a;
18962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int vector1b, vector2b, vector3b;
18972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
18982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_l2;
18992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_l1;
19002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_0;
19012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_r1;
19022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_r2;
19032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char src_ptr_r3;
19042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned char *cm = ff_cropTbl + CROP_WIDTH;
19052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector4a = 64;
19072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector1b = vp8_filter[0];
19092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector2b = vp8_filter[2];
19102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    vector3b = vp8_filter[1];
19112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19121b362b15af34006e6a11974088a46d42b903418eJohann    if (vector1b == 0)
19131b362b15af34006e6a11974088a46d42b903418eJohann    {
19142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* 4 tap filter */
19152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* prefetch src_ptr data to cache memory */
19172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        prefetch_load(src_ptr + 16);
19182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = 16; i--;)
19202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
19212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* unrolling for loop */
19222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            for (j = 0; j < 16; j += 8)
19232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            {
19242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* apply filter with vectors pairs */
19252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                __asm__ __volatile__ (
19262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_l1],  -16(%[src_ptr])                 \n\t"
19272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
19282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r1],  16(%[src_ptr])                  \n\t"
19292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r2],  32(%[src_ptr])                  \n\t"
19302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo           %[vector4a],    $ac2                            \n\t"
19312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
19322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
19332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
19342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
19352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_l1],  -15(%[src_ptr])                 \n\t"
19372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
19382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r1],  17(%[src_ptr])                  \n\t"
19392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r2],  33(%[src_ptr])                  \n\t"
19402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo           %[vector4a],    $ac3                            \n\t"
19412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp           %[Temp1],       $ac2,           9               \n\t"
19422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
19442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
19452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
19462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
19472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_l1],  -14(%[src_ptr])                 \n\t"
19492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
19502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r1],  18(%[src_ptr])                  \n\t"
19512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r2],  34(%[src_ptr])                  \n\t"
19522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo           %[vector4a],    $ac1                            \n\t"
19532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp           %[Temp2],       $ac3,           9               \n\t"
19542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
19562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
19572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
19582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
19592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_l1],  -13(%[src_ptr])                 \n\t"
19612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
19622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r1],  19(%[src_ptr])                  \n\t"
19632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r2],  35(%[src_ptr])                  \n\t"
19642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo           %[vector4a],    $ac3                            \n\t"
19652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp           %[Temp3],       $ac1,           9               \n\t"
19662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
19682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
19692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
19702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
19712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_l1],  -12(%[src_ptr])                 \n\t"
19732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
19742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r1],  20(%[src_ptr])                  \n\t"
19752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r2],  36(%[src_ptr])                  \n\t"
19762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo           %[vector4a],    $ac2                            \n\t"
19772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp           %[Temp4],       $ac3,           9               \n\t"
19782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
19802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
19812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
19822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
19832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_l1],  -11(%[src_ptr])                 \n\t"
19852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
19862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r1],  21(%[src_ptr])                  \n\t"
19872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r2],  37(%[src_ptr])                  \n\t"
19882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo           %[vector4a],    $ac3                            \n\t"
19892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp           %[Temp5],       $ac2,           9               \n\t"
19902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
19922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
19932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
19942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
19952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
19962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_l1],  -10(%[src_ptr])                 \n\t"
19972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
19982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r1],  22(%[src_ptr])                  \n\t"
19992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r2],  38(%[src_ptr])                  \n\t"
20002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo           %[vector4a],    $ac1                            \n\t"
20012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp           %[Temp6],       $ac3,           9               \n\t"
20022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
20042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
20052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
20062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
20072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_l1],  -9(%[src_ptr])                  \n\t"
20092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
20102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r1],  23(%[src_ptr])                  \n\t"
20112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "lbu            %[src_ptr_r2],  39(%[src_ptr])                  \n\t"
20122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "mtlo           %[vector4a],    $ac3                            \n\t"
20132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp           %[Temp7],       $ac1,           9               \n\t"
20142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
20162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
20172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
20182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
20192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    "extp           %[Temp8],       $ac3,           9               \n\t"
20202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
20222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4),
20232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6),
20242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
20252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
20262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2)
20272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                    : [vector2b] "r" (vector2b), [vector3b] "r" (vector3b),
20282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                      [vector4a] "r" (vector4a), [src_ptr] "r" (src_ptr)
20292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                );
20302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                /* clamp and store results */
20322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                output_ptr[j] = cm[Temp1];
20331b362b15af34006e6a11974088a46d42b903418eJohann                output_ptr[j + 1] = cm[Temp2];
20341b362b15af34006e6a11974088a46d42b903418eJohann                output_ptr[j + 2] = cm[Temp3];
20351b362b15af34006e6a11974088a46d42b903418eJohann                output_ptr[j + 3] = cm[Temp4];
20361b362b15af34006e6a11974088a46d42b903418eJohann                output_ptr[j + 4] = cm[Temp5];
20371b362b15af34006e6a11974088a46d42b903418eJohann                output_ptr[j + 5] = cm[Temp6];
20381b362b15af34006e6a11974088a46d42b903418eJohann                output_ptr[j + 6] = cm[Temp7];
20391b362b15af34006e6a11974088a46d42b903418eJohann                output_ptr[j + 7] = cm[Temp8];
20402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                src_ptr += 8;
20422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            }
20432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr += output_pitch;
20452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
20462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
20471b362b15af34006e6a11974088a46d42b903418eJohann    else
20481b362b15af34006e6a11974088a46d42b903418eJohann    {
20492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* 4 tap filter */
20502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* prefetch src_ptr data to cache memory */
20522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        prefetch_load(src_ptr + 16);
20532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* unroll for loop */
20552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        for (i = 16; i--;)
20562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        {
20572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* apply filter with vectors pairs */
20582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
20592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -32(%[src_ptr])                 \n\t"
20602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -16(%[src_ptr])                 \n\t"
20612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   0(%[src_ptr])                   \n\t"
20622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  16(%[src_ptr])                  \n\t"
20632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  32(%[src_ptr])                  \n\t"
20642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  48(%[src_ptr])                  \n\t"
20652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
20662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
20682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
20692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
20702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
20712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
20722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
20732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -31(%[src_ptr])                 \n\t"
20752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -15(%[src_ptr])                 \n\t"
20762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   1(%[src_ptr])                   \n\t"
20772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  17(%[src_ptr])                  \n\t"
20782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  33(%[src_ptr])                  \n\t"
20792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  49(%[src_ptr])                  \n\t"
20802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
20812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp1],       $ac2,           9               \n\t"
20822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
20842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
20852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
20862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
20872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
20882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
20892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -30(%[src_ptr])                 \n\t"
20912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -14(%[src_ptr])                 \n\t"
20922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   2(%[src_ptr])                   \n\t"
20932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  18(%[src_ptr])                  \n\t"
20942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  34(%[src_ptr])                  \n\t"
20952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  50(%[src_ptr])                  \n\t"
20962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
20972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp2],       $ac0,           9               \n\t"
20982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
20992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
21002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
21012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
21022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
21032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
21042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
21052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -29(%[src_ptr])                 \n\t"
21072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -13(%[src_ptr])                 \n\t"
21082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   3(%[src_ptr])                   \n\t"
21092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  19(%[src_ptr])                  \n\t"
21102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  35(%[src_ptr])                  \n\t"
21112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  51(%[src_ptr])                  \n\t"
21122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
21132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp3],       $ac1,           9               \n\t"
21142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
21162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
21172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
21182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
21192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
21202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
21212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -28(%[src_ptr])                 \n\t"
21232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -12(%[src_ptr])                 \n\t"
21242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   4(%[src_ptr])                   \n\t"
21252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  20(%[src_ptr])                  \n\t"
21262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  36(%[src_ptr])                  \n\t"
21272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  52(%[src_ptr])                  \n\t"
21282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
21292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp4],       $ac3,           9               \n\t"
21302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
21322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
21332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
21342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
21352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
21362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
21372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -27(%[src_ptr])                 \n\t"
21392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -11(%[src_ptr])                 \n\t"
21402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   5(%[src_ptr])                   \n\t"
21412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  21(%[src_ptr])                  \n\t"
21422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  37(%[src_ptr])                  \n\t"
21432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  53(%[src_ptr])                  \n\t"
21442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
21452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp5],       $ac2,           9               \n\t"
21462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
21482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
21492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
21502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
21512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
21522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
21532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -26(%[src_ptr])                 \n\t"
21552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -10(%[src_ptr])                 \n\t"
21562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   6(%[src_ptr])                   \n\t"
21572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  22(%[src_ptr])                  \n\t"
21582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  38(%[src_ptr])                  \n\t"
21592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  54(%[src_ptr])                  \n\t"
21602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
21612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp6],       $ac0,           9               \n\t"
21622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
21642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
21652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
21662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
21672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
21682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
21692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -25(%[src_ptr])                 \n\t"
21712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -9(%[src_ptr])                  \n\t"
21722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   7(%[src_ptr])                   \n\t"
21732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  23(%[src_ptr])                  \n\t"
21742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  39(%[src_ptr])                  \n\t"
21752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  55(%[src_ptr])                  \n\t"
21762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
21772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp7],       $ac1,           9               \n\t"
21782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
21802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
21812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
21822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
21832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
21842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
21852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp8],       $ac3,           9               \n\t"
21862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
21882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4),
21892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6),
21902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
21912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
21922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
21932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l2] "=&r" (src_ptr_l2),[src_ptr_r3] "=&r" (src_ptr_r3)
21942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
21952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
21962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr] "r" (src_ptr)
21972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
21982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
21992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* clamp and store results */
22002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[0] = cm[Temp1];
22012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[1] = cm[Temp2];
22022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[2] = cm[Temp3];
22032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[3] = cm[Temp4];
22042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[4] = cm[Temp5];
22052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[5] = cm[Temp6];
22062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[6] = cm[Temp7];
22072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[7] = cm[Temp8];
22082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* apply filter with vectors pairs */
22102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
22112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -24(%[src_ptr])                 \n\t"
22122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -8(%[src_ptr])                  \n\t"
22132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   8(%[src_ptr])                   \n\t"
22142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  24(%[src_ptr])                  \n\t"
22152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  40(%[src_ptr])                  \n\t"
22162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  56(%[src_ptr])                  \n\t"
22172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
22182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
22202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
22212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
22222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
22232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
22242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
22252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -23(%[src_ptr])                 \n\t"
22272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -7(%[src_ptr])                  \n\t"
22282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   9(%[src_ptr])                   \n\t"
22292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  25(%[src_ptr])                  \n\t"
22302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  41(%[src_ptr])                  \n\t"
22312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  57(%[src_ptr])                  \n\t"
22322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
22332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp1],       $ac2,           9               \n\t"
22342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
22362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
22372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
22382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
22392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
22402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
22412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -22(%[src_ptr])                 \n\t"
22432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -6(%[src_ptr])                  \n\t"
22442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   10(%[src_ptr])                  \n\t"
22452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  26(%[src_ptr])                  \n\t"
22462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  42(%[src_ptr])                  \n\t"
22472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  58(%[src_ptr])                  \n\t"
22482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
22492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp2],       $ac0,           9               \n\t"
22502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
22522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
22532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
22542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
22552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
22562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
22572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -21(%[src_ptr])                 \n\t"
22592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -5(%[src_ptr])                  \n\t"
22602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   11(%[src_ptr])                  \n\t"
22612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  27(%[src_ptr])                  \n\t"
22622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  43(%[src_ptr])                  \n\t"
22632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  59(%[src_ptr])                  \n\t"
22642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
22652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp3],       $ac1,           9               \n\t"
22662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
22682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
22692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
22702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
22712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
22722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
22732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -20(%[src_ptr])                 \n\t"
22752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -4(%[src_ptr])                  \n\t"
22762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   12(%[src_ptr])                  \n\t"
22772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  28(%[src_ptr])                  \n\t"
22782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  44(%[src_ptr])                  \n\t"
22792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  60(%[src_ptr])                  \n\t"
22802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac2                            \n\t"
22812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp4],       $ac3,           9               \n\t"
22822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
22842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
22852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
22862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_l2],  %[vector1b]     \n\t"
22872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac2,           %[src_ptr_0],   %[vector2b]     \n\t"
22882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac2,           %[src_ptr_l1],  %[vector3b]     \n\t"
22892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -19(%[src_ptr])                 \n\t"
22912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -3(%[src_ptr])                  \n\t"
22922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   13(%[src_ptr])                  \n\t"
22932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  29(%[src_ptr])                  \n\t"
22942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  45(%[src_ptr])                  \n\t"
22952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  61(%[src_ptr])                  \n\t"
22962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac0                            \n\t"
22972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp5],       $ac2,           9               \n\t"
22982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
22992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
23002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
23012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
23022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_l2],  %[vector1b]     \n\t"
23032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac0,           %[src_ptr_0],   %[vector2b]     \n\t"
23042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac0,           %[src_ptr_l1],  %[vector3b]     \n\t"
23052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -18(%[src_ptr])                 \n\t"
23072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -2(%[src_ptr])                  \n\t"
23082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   14(%[src_ptr])                  \n\t"
23092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  30(%[src_ptr])                  \n\t"
23102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  46(%[src_ptr])                  \n\t"
23112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  62(%[src_ptr])                  \n\t"
23122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac1                            \n\t"
23132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp6],       $ac0,           9               \n\t"
23142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
23162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
23172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
23182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_l2],  %[vector1b]     \n\t"
23192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac1,           %[src_ptr_0],   %[vector2b]     \n\t"
23202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac1,           %[src_ptr_l1],  %[vector3b]     \n\t"
23212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l2],  -17(%[src_ptr])                 \n\t"
23232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_l1],  -1(%[src_ptr])                  \n\t"
23242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_0],   15(%[src_ptr])                  \n\t"
23252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r1],  31(%[src_ptr])                  \n\t"
23262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r2],  47(%[src_ptr])                  \n\t"
23272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "lbu            %[src_ptr_r3],  63(%[src_ptr])                  \n\t"
23282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "mtlo           %[vector4a],    $ac3                            \n\t"
23292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp7],       $ac1,           9               \n\t"
23302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l2],  %[src_ptr_r3],  8               \n\t"
23322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_0],   %[src_ptr_r1],  8               \n\t"
23332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "append         %[src_ptr_l1],  %[src_ptr_r2],  8               \n\t"
23342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_l2],  %[vector1b]     \n\t"
23352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpau.h.qbr     $ac3,           %[src_ptr_0],   %[vector2b]     \n\t"
23362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "dpsu.h.qbr     $ac3,           %[src_ptr_l1],  %[vector3b]     \n\t"
23372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "extp           %[Temp8],       $ac3,           9               \n\t"
23382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2),
23402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp3] "=&r" (Temp3), [Temp4] "=&r" (Temp4),
23412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp5] "=&r" (Temp5), [Temp6] "=&r" (Temp6),
23422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [Temp7] "=&r" (Temp7), [Temp8] "=r" (Temp8),
23432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l1] "=&r" (src_ptr_l1), [src_ptr_0] "=&r" (src_ptr_0),
23442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_r1] "=&r" (src_ptr_r1), [src_ptr_r2] "=&r" (src_ptr_r2),
23452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr_l2] "=&r" (src_ptr_l2), [src_ptr_r3] "=&r" (src_ptr_r3)
23462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [vector1b] "r" (vector1b), [vector2b] "r" (vector2b),
23472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [vector3b] "r" (vector3b), [vector4a] "r" (vector4a),
23482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_ptr] "r" (src_ptr)
23492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
23502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            src_ptr += 16;
23522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[8] = cm[Temp1];
23532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[9] = cm[Temp2];
23542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[10] = cm[Temp3];
23552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[11] = cm[Temp4];
23562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[12] = cm[Temp5];
23572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[13] = cm[Temp6];
23582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[14] = cm[Temp7];
23592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr[15] = cm[Temp8];
23602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            output_ptr += output_pitch;
23622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
23632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
23642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
23652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23671b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_sixtap_predict4x4_dspr2
23682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
23691b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT src_ptr,
23702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int   src_pixels_per_line,
23712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  xoffset,
23722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  yoffset,
23731b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT dst_ptr,
23742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int dst_pitch
23752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
23762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
23771b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char FData[9 * 4]; /* Temp data bufffer used in filtering */
23782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int pos = 16;
23792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* bit positon for extract from acc */
23812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    __asm__ __volatile__ (
23822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        "wrdsp      %[pos],     1           \n\t"
23832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        :
23842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        : [pos] "r" (pos)
23852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    );
23862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
23871b362b15af34006e6a11974088a46d42b903418eJohann    if (yoffset)
23881b362b15af34006e6a11974088a46d42b903418eJohann    {
23892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* First filter 1-D horizontally... */
23902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        vp8_filter_block2d_first_pass_4(src_ptr - (2 * src_pixels_per_line), FData,
23912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                                        src_pixels_per_line, 9, xoffset, 4);
23922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* then filter verticaly... */
23932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        vp8_filter_block2d_second_pass4(FData + 8, dst_ptr, dst_pitch, yoffset);
23942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
23952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    else
23962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
23972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        vp8_filter_block2d_first_pass_4(src_ptr, dst_ptr, src_pixels_per_line,
23982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                                        4, xoffset, dst_pitch);
23992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
24002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24021b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_sixtap_predict8x8_dspr2
24032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
24041b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char   *RESTRICT src_ptr,
24052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  src_pixels_per_line,
24062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  xoffset,
24072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  yoffset,
24081b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT dst_ptr,
24092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  dst_pitch
24102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
24112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
24122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24131b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char FData[13 * 8]; /* Temp data bufffer used in filtering */
24141b362b15af34006e6a11974088a46d42b903418eJohann    unsigned int pos, Temp1, Temp2;
24152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    pos = 16;
24172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* bit positon for extract from acc */
24192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    __asm__ __volatile__ (
24202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        "wrdsp      %[pos],     1               \n\t"
24212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        :
24222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        : [pos] "r" (pos)
24232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    );
24242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24251b362b15af34006e6a11974088a46d42b903418eJohann    if (yoffset)
24261b362b15af34006e6a11974088a46d42b903418eJohann    {
24272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        src_ptr = src_ptr - (2 * src_pixels_per_line);
24292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        if (xoffset)
24312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* filter 1-D horizontally... */
24322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line,
24332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                                                13, xoffset, 8);
24342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24351b362b15af34006e6a11974088a46d42b903418eJohann        else
24361b362b15af34006e6a11974088a46d42b903418eJohann        {
24372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* prefetch src_ptr data to cache memory */
24381b362b15af34006e6a11974088a46d42b903418eJohann            prefetch_load(src_ptr + 2 * src_pixels_per_line);
24392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
24412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   0(%[FData])                             \n\t"
24442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   4(%[FData])                             \n\t"
24452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
24462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   8(%[FData])                             \n\t"
24502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   12(%[FData])                            \n\t"
24512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
24522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   16(%[FData])                            \n\t"
24562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   20(%[FData])                            \n\t"
24572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
24582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   24(%[FData])                            \n\t"
24622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   28(%[FData])                            \n\t"
24632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
24642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   32(%[FData])                            \n\t"
24682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   36(%[FData])                            \n\t"
24692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
24702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   40(%[FData])                            \n\t"
24742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   44(%[FData])                            \n\t"
24752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
24762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   48(%[FData])                            \n\t"
24802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   52(%[FData])                            \n\t"
24812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
24822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   56(%[FData])                            \n\t"
24862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   60(%[FData])                            \n\t"
24872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
24882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   64(%[FData])                            \n\t"
24922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   68(%[FData])                            \n\t"
24932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
24942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
24952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
24962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
24972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   72(%[FData])                            \n\t"
24982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   76(%[FData])                            \n\t"
24992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
25002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   80(%[FData])                            \n\t"
25042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   84(%[FData])                            \n\t"
25052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
25062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   88(%[FData])                            \n\t"
25102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   92(%[FData])                            \n\t"
25112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
25122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   96(%[FData])                            \n\t"
25162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   100(%[FData])                           \n\t"
25172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
25192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [FData] "r" (FData), [src_ptr] "r" (src_ptr),
25202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_pixels_per_line] "r" (src_pixels_per_line)
25212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
25222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
25231b362b15af34006e6a11974088a46d42b903418eJohann
25242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* filter verticaly... */
25252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 8, 8, yoffset);
25262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
25272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
25291b362b15af34006e6a11974088a46d42b903418eJohann    else
25301b362b15af34006e6a11974088a46d42b903418eJohann    {
25312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        if (xoffset)
25322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line,
25332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                                                8, xoffset, dst_pitch);
25342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25351b362b15af34006e6a11974088a46d42b903418eJohann        else
25361b362b15af34006e6a11974088a46d42b903418eJohann        {
25372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* copy from src buffer to dst buffer */
25382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
25392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   0(%[dst_ptr])                           \n\t"
25422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   4(%[dst_ptr])                           \n\t"
25432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
25442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   8(%[dst_ptr])                           \n\t"
25482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   12(%[dst_ptr])                          \n\t"
25492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
25502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   16(%[dst_ptr])                          \n\t"
25542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   20(%[dst_ptr])                          \n\t"
25552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
25562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   24(%[dst_ptr])                          \n\t"
25602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   28(%[dst_ptr])                          \n\t"
25612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],   %[src_pixels_per_line]    \n\t"
25622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   32(%[dst_ptr])                          \n\t"
25662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   36(%[dst_ptr])                          \n\t"
25672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
25682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   40(%[dst_ptr])                          \n\t"
25722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   44(%[dst_ptr])                          \n\t"
25732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
25742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   48(%[dst_ptr])                          \n\t"
25782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   52(%[dst_ptr])                          \n\t"
25792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
25802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
25822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
25832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   56(%[dst_ptr])                          \n\t"
25842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   60(%[dst_ptr])                          \n\t"
25852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
25872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr),
25882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_pixels_per_line] "r" (src_pixels_per_line)
25892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
25902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
25912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
25922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
25932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
25951b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_sixtap_predict8x4_dspr2
25962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
25971b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char   *RESTRICT src_ptr,
25982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  src_pixels_per_line,
25992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  xoffset,
26002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  yoffset,
26011b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT dst_ptr,
26022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  dst_pitch
26032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
26042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
26051b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char FData[9 * 8]; /* Temp data bufffer used in filtering */
26061b362b15af34006e6a11974088a46d42b903418eJohann    unsigned int pos, Temp1, Temp2;
26072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    pos = 16;
26092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* bit positon for extract from acc */
26112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    __asm__ __volatile__ (
26122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        "wrdsp      %[pos],     1           \n\t"
26132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        :
26142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        : [pos] "r" (pos)
26152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    );
26162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26171b362b15af34006e6a11974088a46d42b903418eJohann    if (yoffset)
26181b362b15af34006e6a11974088a46d42b903418eJohann    {
26192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        src_ptr = src_ptr - (2 * src_pixels_per_line);
26212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        if (xoffset)
26232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* filter 1-D horizontally... */
26242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line,
26252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                                                9, xoffset, 8);
26262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26271b362b15af34006e6a11974088a46d42b903418eJohann        else
26281b362b15af34006e6a11974088a46d42b903418eJohann        {
26292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* prefetch src_ptr data to cache memory */
26301b362b15af34006e6a11974088a46d42b903418eJohann            prefetch_load(src_ptr + 2 * src_pixels_per_line);
26312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
26332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
26342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
26352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   0(%[FData])                             \n\t"
26362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   4(%[FData])                             \n\t"
26372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
26382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
26402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
26412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   8(%[FData])                             \n\t"
26422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   12(%[FData])                            \n\t"
26432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
26442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
26462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
26472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   16(%[FData])                            \n\t"
26482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   20(%[FData])                            \n\t"
26492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
26502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
26522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
26532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   24(%[FData])                            \n\t"
26542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   28(%[FData])                            \n\t"
26552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
26562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
26582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
26592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   32(%[FData])                            \n\t"
26602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   36(%[FData])                            \n\t"
26612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
26622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
26642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
26652f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   40(%[FData])                            \n\t"
26662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   44(%[FData])                            \n\t"
26672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
26682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
26702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
26712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   48(%[FData])                            \n\t"
26722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   52(%[FData])                            \n\t"
26732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
26742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
26762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
26772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   56(%[FData])                            \n\t"
26782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   60(%[FData])                            \n\t"
26792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
26802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
26822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
26832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   64(%[FData])                            \n\t"
26842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   68(%[FData])                            \n\t"
26852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
26872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [FData] "r" (FData), [src_ptr] "r" (src_ptr),
26882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_pixels_per_line] "r" (src_pixels_per_line)
26892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
26902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
26911b362b15af34006e6a11974088a46d42b903418eJohann
26922f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* filter verticaly... */
26932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 4, 8, yoffset);
26942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
26952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
26962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
26971b362b15af34006e6a11974088a46d42b903418eJohann    else
26981b362b15af34006e6a11974088a46d42b903418eJohann    {
26992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        if (xoffset)
27002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line,
27012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                                                4, xoffset, dst_pitch);
27022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27031b362b15af34006e6a11974088a46d42b903418eJohann        else
27041b362b15af34006e6a11974088a46d42b903418eJohann        {
27052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* copy from src buffer to dst buffer */
27062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            __asm__ __volatile__ (
27072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
27082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
27092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   0(%[dst_ptr])                           \n\t"
27102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   4(%[dst_ptr])                           \n\t"
27112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
27122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
27142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
27152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   8(%[dst_ptr])                           \n\t"
27162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   12(%[dst_ptr])                          \n\t"
27172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
27182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
27202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
27212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   16(%[dst_ptr])                          \n\t"
27222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   20(%[dst_ptr])                          \n\t"
27232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "addu   %[src_ptr], %[src_ptr],    %[src_pixels_per_line]   \n\t"
27242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp1],   0(%[src_ptr])                           \n\t"
27262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "ulw    %[Temp2],   4(%[src_ptr])                           \n\t"
27272f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp1],   24(%[dst_ptr])                          \n\t"
27282f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                "sw     %[Temp2],   28(%[dst_ptr])                          \n\t"
27292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27302f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [Temp1] "=&r" (Temp1), [Temp2] "=&r" (Temp2)
27312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                : [dst_ptr] "r" (dst_ptr), [src_ptr] "r" (src_ptr),
27322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan                  [src_pixels_per_line] "r" (src_pixels_per_line)
27332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            );
27342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
27352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
27362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
27372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27391b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_sixtap_predict16x16_dspr2
27402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan(
27411b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char   *RESTRICT src_ptr,
27422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  src_pixels_per_line,
27432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  xoffset,
27442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  yoffset,
27451b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *RESTRICT dst_ptr,
27462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    int  dst_pitch
27472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan)
27482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{
27492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    const unsigned short *VFilter;
27501b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char FData[21 * 16]; /* Temp data bufffer used in filtering */
27512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    unsigned int pos;
27522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    VFilter = sub_pel_filterss[yoffset];
27542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    pos = 16;
27562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    /* bit positon for extract from acc */
27582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    __asm__ __volatile__ (
27592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        "wrdsp      %[pos],     1           \n\t"
27602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        :
27612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        : [pos] "r" (pos)
27622f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    );
27632f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27641b362b15af34006e6a11974088a46d42b903418eJohann    if (yoffset)
27651b362b15af34006e6a11974088a46d42b903418eJohann    {
27662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        src_ptr = src_ptr - (2 * src_pixels_per_line);
27682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan
27691b362b15af34006e6a11974088a46d42b903418eJohann        switch (xoffset)
27701b362b15af34006e6a11974088a46d42b903418eJohann        {
27712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan            /* filter 1-D horizontally... */
27721b362b15af34006e6a11974088a46d42b903418eJohann        case 2:
27731b362b15af34006e6a11974088a46d42b903418eJohann        case 4:
27741b362b15af34006e6a11974088a46d42b903418eJohann        case 6:
27751b362b15af34006e6a11974088a46d42b903418eJohann            /* 6 tap filter */
27761b362b15af34006e6a11974088a46d42b903418eJohann            vp8_filter_block2d_first_pass16_6tap(src_ptr, FData, src_pixels_per_line,
27771b362b15af34006e6a11974088a46d42b903418eJohann                                                 21, xoffset, 16);
27781b362b15af34006e6a11974088a46d42b903418eJohann            break;
27791b362b15af34006e6a11974088a46d42b903418eJohann
27801b362b15af34006e6a11974088a46d42b903418eJohann        case 0:
27811b362b15af34006e6a11974088a46d42b903418eJohann            /* only copy buffer */
27821b362b15af34006e6a11974088a46d42b903418eJohann            vp8_filter_block2d_first_pass16_0(src_ptr, FData, src_pixels_per_line);
27831b362b15af34006e6a11974088a46d42b903418eJohann            break;
27841b362b15af34006e6a11974088a46d42b903418eJohann
27851b362b15af34006e6a11974088a46d42b903418eJohann        case 1:
27861b362b15af34006e6a11974088a46d42b903418eJohann        case 3:
27871b362b15af34006e6a11974088a46d42b903418eJohann        case 5:
27881b362b15af34006e6a11974088a46d42b903418eJohann        case 7:
27891b362b15af34006e6a11974088a46d42b903418eJohann            /* 4 tap filter */
27901b362b15af34006e6a11974088a46d42b903418eJohann            vp8_filter_block2d_first_pass16_4tap(src_ptr, FData, src_pixels_per_line, 16,
27911b362b15af34006e6a11974088a46d42b903418eJohann                                                 21, xoffset, yoffset, dst_ptr, dst_pitch);
27921b362b15af34006e6a11974088a46d42b903418eJohann            break;
27932f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
27941b362b15af34006e6a11974088a46d42b903418eJohann
27952f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* filter verticaly... */
27962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        vp8_filter_block2d_second_pass161(FData + 32, dst_ptr, dst_pitch, VFilter);
27972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
27981b362b15af34006e6a11974088a46d42b903418eJohann    else
27991b362b15af34006e6a11974088a46d42b903418eJohann    {
28002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */
28011b362b15af34006e6a11974088a46d42b903418eJohann        switch (xoffset)
28021b362b15af34006e6a11974088a46d42b903418eJohann        {
28031b362b15af34006e6a11974088a46d42b903418eJohann        case 2:
28041b362b15af34006e6a11974088a46d42b903418eJohann        case 4:
28051b362b15af34006e6a11974088a46d42b903418eJohann        case 6:
28061b362b15af34006e6a11974088a46d42b903418eJohann            /* 6 tap filter */
28071b362b15af34006e6a11974088a46d42b903418eJohann            vp8_filter_block2d_first_pass16_6tap(src_ptr, dst_ptr, src_pixels_per_line,
28081b362b15af34006e6a11974088a46d42b903418eJohann                                                 16, xoffset, dst_pitch);
28091b362b15af34006e6a11974088a46d42b903418eJohann            break;
28101b362b15af34006e6a11974088a46d42b903418eJohann
28111b362b15af34006e6a11974088a46d42b903418eJohann        case 1:
28121b362b15af34006e6a11974088a46d42b903418eJohann        case 3:
28131b362b15af34006e6a11974088a46d42b903418eJohann        case 5:
28141b362b15af34006e6a11974088a46d42b903418eJohann        case 7:
28151b362b15af34006e6a11974088a46d42b903418eJohann            /* 4 tap filter */
28161b362b15af34006e6a11974088a46d42b903418eJohann            vp8_filter_block2d_first_pass16_4tap(src_ptr, dst_ptr, src_pixels_per_line, 16,
28171b362b15af34006e6a11974088a46d42b903418eJohann                                                 21, xoffset, yoffset, dst_ptr, dst_pitch);
28181b362b15af34006e6a11974088a46d42b903418eJohann            break;
28192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan        }
28202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan    }
28212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan}
28221b362b15af34006e6a11974088a46d42b903418eJohann
28231b362b15af34006e6a11974088a46d42b903418eJohann#endif
2824