19b35249446b07f40ac5fcc3205f2c048616efacchkuang/* 29b35249446b07f40ac5fcc3205f2c048616efacchkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 39b35249446b07f40ac5fcc3205f2c048616efacchkuang * 49b35249446b07f40ac5fcc3205f2c048616efacchkuang * Use of this source code is governed by a BSD-style license 59b35249446b07f40ac5fcc3205f2c048616efacchkuang * that can be found in the LICENSE file in the root of the source 69b35249446b07f40ac5fcc3205f2c048616efacchkuang * tree. An additional intellectual property rights grant can be found 79b35249446b07f40ac5fcc3205f2c048616efacchkuang * in the file PATENTS. All contributing project authors may 89b35249446b07f40ac5fcc3205f2c048616efacchkuang * be found in the AUTHORS file in the root of the source tree. 99b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 109b35249446b07f40ac5fcc3205f2c048616efacchkuang 119b35249446b07f40ac5fcc3205f2c048616efacchkuang#include <stdlib.h> 129b35249446b07f40ac5fcc3205f2c048616efacchkuang 137ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "./vpx_dsp_rtcd.h" 147ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vpx/vpx_integer.h" 157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vpx_dsp/mips/common_dspr2.h" 167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vpx_dsp/mips/loopfilter_filters_dspr2.h" 177ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vpx_dsp/mips/loopfilter_macros_dspr2.h" 187ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vpx_dsp/mips/loopfilter_masks_dspr2.h" 197ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian#include "vpx_mem/vpx_mem.h" 209b35249446b07f40ac5fcc3205f2c048616efacchkuang 219b35249446b07f40ac5fcc3205f2c048616efacchkuang#if HAVE_DSPR2 227bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vpx_lpf_horizontal_4_dspr2(unsigned char *s, int pitch, 237bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8_t *blimit, const uint8_t *limit, 2468e1c830ade592be74773e249bf94e2bbfb50de7Johann const uint8_t *thresh) { 257bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t i; 267bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t mask; 277bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t hev; 287bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; 297bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; 307bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t thresh_vec, flimit_vec, limit_vec; 317bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t uflimit, ulimit, uthresh; 329b35249446b07f40ac5fcc3205f2c048616efacchkuang 339b35249446b07f40ac5fcc3205f2c048616efacchkuang uflimit = *blimit; 349b35249446b07f40ac5fcc3205f2c048616efacchkuang ulimit = *limit; 359b35249446b07f40ac5fcc3205f2c048616efacchkuang uthresh = *thresh; 369b35249446b07f40ac5fcc3205f2c048616efacchkuang 379b35249446b07f40ac5fcc3205f2c048616efacchkuang /* create quad-byte */ 387bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 399b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[thresh_vec], %[uthresh] \n\t" 409b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[flimit_vec], %[uflimit] \n\t" 419b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[limit_vec], %[ulimit] \n\t" 429b35249446b07f40ac5fcc3205f2c048616efacchkuang 437bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), 447bc9febe8749e98a3812a0dc4380ceae75c29450Johann [limit_vec] "=r"(limit_vec) 457bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit)); 469b35249446b07f40ac5fcc3205f2c048616efacchkuang 479b35249446b07f40ac5fcc3205f2c048616efacchkuang /* prefetch data for store */ 487ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian prefetch_store(s); 499b35249446b07f40ac5fcc3205f2c048616efacchkuang 509b35249446b07f40ac5fcc3205f2c048616efacchkuang /* loop filter designed to work using chars so that we can make maximum use 519b35249446b07f40ac5fcc3205f2c048616efacchkuang of 8 bit simd instructions. */ 529b35249446b07f40ac5fcc3205f2c048616efacchkuang for (i = 0; i < 2; i++) { 539b35249446b07f40ac5fcc3205f2c048616efacchkuang sm1 = s - (pitch << 2); 549b35249446b07f40ac5fcc3205f2c048616efacchkuang s0 = sm1 + pitch; 559b35249446b07f40ac5fcc3205f2c048616efacchkuang s1 = s0 + pitch; 569b35249446b07f40ac5fcc3205f2c048616efacchkuang s2 = s - pitch; 579b35249446b07f40ac5fcc3205f2c048616efacchkuang s3 = s; 589b35249446b07f40ac5fcc3205f2c048616efacchkuang s4 = s + pitch; 599b35249446b07f40ac5fcc3205f2c048616efacchkuang s5 = s4 + pitch; 609b35249446b07f40ac5fcc3205f2c048616efacchkuang s6 = s5 + pitch; 619b35249446b07f40ac5fcc3205f2c048616efacchkuang 627bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 639b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p1], (%[s1]) \n\t" 649b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p2], (%[s2]) \n\t" 659b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p3], (%[s3]) \n\t" 669b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p4], (%[s4]) \n\t" 679b35249446b07f40ac5fcc3205f2c048616efacchkuang 687bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4) 697bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4)); 709b35249446b07f40ac5fcc3205f2c048616efacchkuang 719b35249446b07f40ac5fcc3205f2c048616efacchkuang /* if (p1 - p4 == 0) and (p2 - p3 == 0) 729b35249446b07f40ac5fcc3205f2c048616efacchkuang mask will be zero and filtering is not needed */ 739b35249446b07f40ac5fcc3205f2c048616efacchkuang if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { 747bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 759b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[pm1], (%[sm1]) \n\t" 769b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p0], (%[s0]) \n\t" 779b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p5], (%[s5]) \n\t" 789b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p6], (%[s6]) \n\t" 799b35249446b07f40ac5fcc3205f2c048616efacchkuang 807bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [pm1] "=&r"(pm1), [p0] "=&r"(p0), [p5] "=&r"(p5), [p6] "=&r"(p6) 817bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [sm1] "r"(sm1), [s0] "r"(s0), [s5] "r"(s5), [s6] "r"(s6)); 829b35249446b07f40ac5fcc3205f2c048616efacchkuang 837bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5, 847bc9febe8749e98a3812a0dc4380ceae75c29450Johann p6, thresh_vec, &hev, &mask); 859b35249446b07f40ac5fcc3205f2c048616efacchkuang 869b35249446b07f40ac5fcc3205f2c048616efacchkuang /* if mask == 0 do filtering is not needed */ 879b35249446b07f40ac5fcc3205f2c048616efacchkuang if (mask) { 889b35249446b07f40ac5fcc3205f2c048616efacchkuang /* filtering */ 897ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); 909b35249446b07f40ac5fcc3205f2c048616efacchkuang 917bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 929b35249446b07f40ac5fcc3205f2c048616efacchkuang "sw %[p1], (%[s1]) \n\t" 939b35249446b07f40ac5fcc3205f2c048616efacchkuang "sw %[p2], (%[s2]) \n\t" 949b35249446b07f40ac5fcc3205f2c048616efacchkuang "sw %[p3], (%[s3]) \n\t" 959b35249446b07f40ac5fcc3205f2c048616efacchkuang "sw %[p4], (%[s4]) \n\t" 969b35249446b07f40ac5fcc3205f2c048616efacchkuang 979b35249446b07f40ac5fcc3205f2c048616efacchkuang : 987bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p1] "r"(p1), [p2] "r"(p2), [p3] "r"(p3), [p4] "r"(p4), 997bc9febe8749e98a3812a0dc4380ceae75c29450Johann [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4)); 1009b35249446b07f40ac5fcc3205f2c048616efacchkuang } 1019b35249446b07f40ac5fcc3205f2c048616efacchkuang } 1029b35249446b07f40ac5fcc3205f2c048616efacchkuang 1039b35249446b07f40ac5fcc3205f2c048616efacchkuang s = s + 4; 1049b35249446b07f40ac5fcc3205f2c048616efacchkuang } 1059b35249446b07f40ac5fcc3205f2c048616efacchkuang} 1069b35249446b07f40ac5fcc3205f2c048616efacchkuang 1077bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vpx_lpf_vertical_4_dspr2(unsigned char *s, int pitch, 1087bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8_t *blimit, const uint8_t *limit, 10968e1c830ade592be74773e249bf94e2bbfb50de7Johann const uint8_t *thresh) { 1107bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t i; 1117bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t mask, hev; 1127bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; 1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *s1, *s2, *s3, *s4; 1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t prim1, prim2, sec3, sec4, prim3, prim4; 1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t thresh_vec, flimit_vec, limit_vec; 1167bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint32_t uflimit, ulimit, uthresh; 1179b35249446b07f40ac5fcc3205f2c048616efacchkuang 1189b35249446b07f40ac5fcc3205f2c048616efacchkuang uflimit = *blimit; 1199b35249446b07f40ac5fcc3205f2c048616efacchkuang ulimit = *limit; 1209b35249446b07f40ac5fcc3205f2c048616efacchkuang uthresh = *thresh; 1219b35249446b07f40ac5fcc3205f2c048616efacchkuang 1229b35249446b07f40ac5fcc3205f2c048616efacchkuang /* create quad-byte */ 1237bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 1249b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[thresh_vec], %[uthresh] \n\t" 1259b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[flimit_vec], %[uflimit] \n\t" 1269b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[limit_vec], %[ulimit] \n\t" 1279b35249446b07f40ac5fcc3205f2c048616efacchkuang 1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), 1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann [limit_vec] "=r"(limit_vec) 1307bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit)); 1319b35249446b07f40ac5fcc3205f2c048616efacchkuang 1329b35249446b07f40ac5fcc3205f2c048616efacchkuang /* prefetch data for store */ 1337ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian prefetch_store(s + pitch); 1349b35249446b07f40ac5fcc3205f2c048616efacchkuang 1359b35249446b07f40ac5fcc3205f2c048616efacchkuang for (i = 0; i < 2; i++) { 1369b35249446b07f40ac5fcc3205f2c048616efacchkuang s1 = s; 1379b35249446b07f40ac5fcc3205f2c048616efacchkuang s2 = s + pitch; 1389b35249446b07f40ac5fcc3205f2c048616efacchkuang s3 = s2 + pitch; 1399b35249446b07f40ac5fcc3205f2c048616efacchkuang s4 = s3 + pitch; 1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann s = s4 + pitch; 1419b35249446b07f40ac5fcc3205f2c048616efacchkuang 1429b35249446b07f40ac5fcc3205f2c048616efacchkuang /* load quad-byte vectors 1439b35249446b07f40ac5fcc3205f2c048616efacchkuang * memory is 4 byte aligned 1449b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 1457bc9febe8749e98a3812a0dc4380ceae75c29450Johann p2 = *((uint32_t *)(s1 - 4)); 1467bc9febe8749e98a3812a0dc4380ceae75c29450Johann p6 = *((uint32_t *)(s1)); 1477bc9febe8749e98a3812a0dc4380ceae75c29450Johann p1 = *((uint32_t *)(s2 - 4)); 1487bc9febe8749e98a3812a0dc4380ceae75c29450Johann p5 = *((uint32_t *)(s2)); 1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann p0 = *((uint32_t *)(s3 - 4)); 1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann p4 = *((uint32_t *)(s3)); 1519b35249446b07f40ac5fcc3205f2c048616efacchkuang pm1 = *((uint32_t *)(s4 - 4)); 1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann p3 = *((uint32_t *)(s4)); 1539b35249446b07f40ac5fcc3205f2c048616efacchkuang 1549b35249446b07f40ac5fcc3205f2c048616efacchkuang /* transpose pm1, p0, p1, p2 */ 1557bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 1569b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" 1579b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" 1589b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" 1599b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" 1609b35249446b07f40ac5fcc3205f2c048616efacchkuang 1619b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" 1629b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" 1639b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" 1649b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" 1659b35249446b07f40ac5fcc3205f2c048616efacchkuang 1669b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" 1679b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" 1689b35249446b07f40ac5fcc3205f2c048616efacchkuang "append %[p1], %[sec3], 16 \n\t" 1699b35249446b07f40ac5fcc3205f2c048616efacchkuang "append %[pm1], %[sec4], 16 \n\t" 1709b35249446b07f40ac5fcc3205f2c048616efacchkuang 1717bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), 1727bc9febe8749e98a3812a0dc4380ceae75c29450Johann [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), 1737bc9febe8749e98a3812a0dc4380ceae75c29450Johann [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) 1747bc9febe8749e98a3812a0dc4380ceae75c29450Johann :); 1759b35249446b07f40ac5fcc3205f2c048616efacchkuang 1769b35249446b07f40ac5fcc3205f2c048616efacchkuang /* transpose p3, p4, p5, p6 */ 1777bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 1789b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" 1799b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" 1809b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" 1819b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" 1829b35249446b07f40ac5fcc3205f2c048616efacchkuang 1839b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" 1849b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" 1859b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" 1869b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" 1879b35249446b07f40ac5fcc3205f2c048616efacchkuang 1889b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" 1899b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" 1909b35249446b07f40ac5fcc3205f2c048616efacchkuang "append %[p5], %[sec3], 16 \n\t" 1919b35249446b07f40ac5fcc3205f2c048616efacchkuang "append %[p3], %[sec4], 16 \n\t" 1929b35249446b07f40ac5fcc3205f2c048616efacchkuang 1937bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), 1947bc9febe8749e98a3812a0dc4380ceae75c29450Johann [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4), 1957bc9febe8749e98a3812a0dc4380ceae75c29450Johann [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) 1967bc9febe8749e98a3812a0dc4380ceae75c29450Johann :); 1979b35249446b07f40ac5fcc3205f2c048616efacchkuang 1989b35249446b07f40ac5fcc3205f2c048616efacchkuang /* if (p1 - p4 == 0) and (p2 - p3 == 0) 1999b35249446b07f40ac5fcc3205f2c048616efacchkuang * mask will be zero and filtering is not needed 2009b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 2019b35249446b07f40ac5fcc3205f2c048616efacchkuang if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { 2027bc9febe8749e98a3812a0dc4380ceae75c29450Johann filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5, 2037bc9febe8749e98a3812a0dc4380ceae75c29450Johann p6, thresh_vec, &hev, &mask); 2049b35249446b07f40ac5fcc3205f2c048616efacchkuang 2059b35249446b07f40ac5fcc3205f2c048616efacchkuang /* if mask == 0 do filtering is not needed */ 2069b35249446b07f40ac5fcc3205f2c048616efacchkuang if (mask) { 2079b35249446b07f40ac5fcc3205f2c048616efacchkuang /* filtering */ 2087ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); 2099b35249446b07f40ac5fcc3205f2c048616efacchkuang 2109b35249446b07f40ac5fcc3205f2c048616efacchkuang /* unpack processed 4x4 neighborhood 2119b35249446b07f40ac5fcc3205f2c048616efacchkuang * don't use transpose on output data 2129b35249446b07f40ac5fcc3205f2c048616efacchkuang * because memory isn't aligned 2139b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 2147bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 2159b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p4], 1(%[s4]) \n\t" 2169b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p3], 0(%[s4]) \n\t" 2179b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p2], -1(%[s4]) \n\t" 2189b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p1], -2(%[s4]) \n\t" 2199b35249446b07f40ac5fcc3205f2c048616efacchkuang 2209b35249446b07f40ac5fcc3205f2c048616efacchkuang : 2217bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), 2227bc9febe8749e98a3812a0dc4380ceae75c29450Johann [s4] "r"(s4)); 2239b35249446b07f40ac5fcc3205f2c048616efacchkuang 2247bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 2259b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p4], %[p4], 8 \n\t" 2269b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p3], %[p3], 8 \n\t" 2279b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p2], %[p2], 8 \n\t" 2289b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p1], %[p1], 8 \n\t" 2299b35249446b07f40ac5fcc3205f2c048616efacchkuang 2307bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) 2317bc9febe8749e98a3812a0dc4380ceae75c29450Johann :); 2329b35249446b07f40ac5fcc3205f2c048616efacchkuang 2337bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 2349b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p4], 1(%[s3]) \n\t" 2359b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p3], 0(%[s3]) \n\t" 2369b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p2], -1(%[s3]) \n\t" 2379b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p1], -2(%[s3]) \n\t" 2389b35249446b07f40ac5fcc3205f2c048616efacchkuang 2397bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p1] "+r"(p1) 2407bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [s3] "r"(s3)); 2419b35249446b07f40ac5fcc3205f2c048616efacchkuang 2427bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 2439b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p4], %[p4], 8 \n\t" 2449b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p3], %[p3], 8 \n\t" 2459b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p2], %[p2], 8 \n\t" 2469b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p1], %[p1], 8 \n\t" 2479b35249446b07f40ac5fcc3205f2c048616efacchkuang 2487bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) 2497bc9febe8749e98a3812a0dc4380ceae75c29450Johann :); 2509b35249446b07f40ac5fcc3205f2c048616efacchkuang 2517bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 2529b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p4], 1(%[s2]) \n\t" 2539b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p3], 0(%[s2]) \n\t" 2549b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p2], -1(%[s2]) \n\t" 2559b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p1], -2(%[s2]) \n\t" 2569b35249446b07f40ac5fcc3205f2c048616efacchkuang 2579b35249446b07f40ac5fcc3205f2c048616efacchkuang : 2587bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), 2597bc9febe8749e98a3812a0dc4380ceae75c29450Johann [s2] "r"(s2)); 2609b35249446b07f40ac5fcc3205f2c048616efacchkuang 2617bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 2629b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p4], %[p4], 8 \n\t" 2639b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p3], %[p3], 8 \n\t" 2649b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p2], %[p2], 8 \n\t" 2659b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p1], %[p1], 8 \n\t" 2669b35249446b07f40ac5fcc3205f2c048616efacchkuang 2677bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) 2687bc9febe8749e98a3812a0dc4380ceae75c29450Johann :); 2699b35249446b07f40ac5fcc3205f2c048616efacchkuang 2707bc9febe8749e98a3812a0dc4380ceae75c29450Johann __asm__ __volatile__( 2719b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p4], 1(%[s1]) \n\t" 2729b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p3], 0(%[s1]) \n\t" 2739b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p2], -1(%[s1]) \n\t" 2749b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p1], -2(%[s1]) \n\t" 2759b35249446b07f40ac5fcc3205f2c048616efacchkuang 2769b35249446b07f40ac5fcc3205f2c048616efacchkuang : 2777bc9febe8749e98a3812a0dc4380ceae75c29450Johann : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), 2787bc9febe8749e98a3812a0dc4380ceae75c29450Johann [s1] "r"(s1)); 2799b35249446b07f40ac5fcc3205f2c048616efacchkuang } 2809b35249446b07f40ac5fcc3205f2c048616efacchkuang } 2819b35249446b07f40ac5fcc3205f2c048616efacchkuang } 2829b35249446b07f40ac5fcc3205f2c048616efacchkuang} 283b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 2847bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vpx_lpf_horizontal_4_dual_dspr2( 2857bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *s, int p /* pitch */, const uint8_t *blimit0, 2867bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, 2877bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8_t *limit1, const uint8_t *thresh1) { 28868e1c830ade592be74773e249bf94e2bbfb50de7Johann vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0); 28968e1c830ade592be74773e249bf94e2bbfb50de7Johann vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1); 290b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian} 291b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 2927bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vpx_lpf_horizontal_8_dual_dspr2( 2937bc9febe8749e98a3812a0dc4380ceae75c29450Johann uint8_t *s, int p /* pitch */, const uint8_t *blimit0, 2947bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, 2957bc9febe8749e98a3812a0dc4380ceae75c29450Johann const uint8_t *limit1, const uint8_t *thresh1) { 29668e1c830ade592be74773e249bf94e2bbfb50de7Johann vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0); 29768e1c830ade592be74773e249bf94e2bbfb50de7Johann vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1); 298b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian} 299b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 3007bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0, 301b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *limit0, 302b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *thresh0, 303b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *blimit1, 304b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *limit1, 305b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *thresh1) { 30668e1c830ade592be74773e249bf94e2bbfb50de7Johann vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0); 30768e1c830ade592be74773e249bf94e2bbfb50de7Johann vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1); 308b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian} 309b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 3107bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0, 311b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *limit0, 312b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *thresh0, 313b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *blimit1, 314b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *limit1, 315b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *thresh1) { 31668e1c830ade592be74773e249bf94e2bbfb50de7Johann vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0); 31768e1c830ade592be74773e249bf94e2bbfb50de7Johann vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1); 318b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian} 319b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 3207bc9febe8749e98a3812a0dc4380ceae75c29450Johannvoid vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit, 321b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *limit, 322b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian const uint8_t *thresh) { 3237ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vpx_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh); 3247ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vpx_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh); 325b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian} 3269b35249446b07f40ac5fcc3205f2c048616efacchkuang#endif // #if HAVE_DSPR2 327