19b35249446b07f40ac5fcc3205f2c048616efacchkuang/* 29b35249446b07f40ac5fcc3205f2c048616efacchkuang * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 39b35249446b07f40ac5fcc3205f2c048616efacchkuang * 49b35249446b07f40ac5fcc3205f2c048616efacchkuang * Use of this source code is governed by a BSD-style license 59b35249446b07f40ac5fcc3205f2c048616efacchkuang * that can be found in the LICENSE file in the root of the source 69b35249446b07f40ac5fcc3205f2c048616efacchkuang * tree. An additional intellectual property rights grant can be found 79b35249446b07f40ac5fcc3205f2c048616efacchkuang * in the file PATENTS. All contributing project authors may 89b35249446b07f40ac5fcc3205f2c048616efacchkuang * be found in the AUTHORS file in the root of the source tree. 99b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 109b35249446b07f40ac5fcc3205f2c048616efacchkuang 119b35249446b07f40ac5fcc3205f2c048616efacchkuang#include <stdlib.h> 129b35249446b07f40ac5fcc3205f2c048616efacchkuang 139b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "./vp9_rtcd.h" 149b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_common.h" 159b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_loopfilter.h" 169b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/vp9_onyxc_int.h" 179b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/mips/dspr2/vp9_common_dspr2.h" 189b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/mips/dspr2/vp9_loopfilter_macros_dspr2.h" 199b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/mips/dspr2/vp9_loopfilter_masks_dspr2.h" 209b35249446b07f40ac5fcc3205f2c048616efacchkuang#include "vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h" 219b35249446b07f40ac5fcc3205f2c048616efacchkuang 229b35249446b07f40ac5fcc3205f2c048616efacchkuang#if HAVE_DSPR2 239b35249446b07f40ac5fcc3205f2c048616efacchkuangvoid vp9_loop_filter_horizontal_edge_dspr2(unsigned char *s, 249b35249446b07f40ac5fcc3205f2c048616efacchkuang int pitch, 259b35249446b07f40ac5fcc3205f2c048616efacchkuang const uint8_t *blimit, 269b35249446b07f40ac5fcc3205f2c048616efacchkuang const uint8_t *limit, 279b35249446b07f40ac5fcc3205f2c048616efacchkuang const uint8_t *thresh, 289b35249446b07f40ac5fcc3205f2c048616efacchkuang int count) { 299b35249446b07f40ac5fcc3205f2c048616efacchkuang uint8_t i; 309b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t mask; 319b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t hev; 329b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; 339b35249446b07f40ac5fcc3205f2c048616efacchkuang uint8_t *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; 349b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t thresh_vec, flimit_vec, limit_vec; 359b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t uflimit, ulimit, uthresh; 369b35249446b07f40ac5fcc3205f2c048616efacchkuang 379b35249446b07f40ac5fcc3205f2c048616efacchkuang uflimit = *blimit; 389b35249446b07f40ac5fcc3205f2c048616efacchkuang ulimit = *limit; 399b35249446b07f40ac5fcc3205f2c048616efacchkuang uthresh = *thresh; 409b35249446b07f40ac5fcc3205f2c048616efacchkuang 419b35249446b07f40ac5fcc3205f2c048616efacchkuang /* create quad-byte */ 429b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 439b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[thresh_vec], %[uthresh] \n\t" 449b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[flimit_vec], %[uflimit] \n\t" 459b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[limit_vec], %[ulimit] \n\t" 469b35249446b07f40ac5fcc3205f2c048616efacchkuang 479b35249446b07f40ac5fcc3205f2c048616efacchkuang : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), 489b35249446b07f40ac5fcc3205f2c048616efacchkuang [limit_vec] "=r" (limit_vec) 499b35249446b07f40ac5fcc3205f2c048616efacchkuang : [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit) 509b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 519b35249446b07f40ac5fcc3205f2c048616efacchkuang 529b35249446b07f40ac5fcc3205f2c048616efacchkuang /* prefetch data for store */ 539b35249446b07f40ac5fcc3205f2c048616efacchkuang vp9_prefetch_store(s); 549b35249446b07f40ac5fcc3205f2c048616efacchkuang 559b35249446b07f40ac5fcc3205f2c048616efacchkuang /* loop filter designed to work using chars so that we can make maximum use 569b35249446b07f40ac5fcc3205f2c048616efacchkuang of 8 bit simd instructions. */ 579b35249446b07f40ac5fcc3205f2c048616efacchkuang for (i = 0; i < 2; i++) { 589b35249446b07f40ac5fcc3205f2c048616efacchkuang sm1 = s - (pitch << 2); 599b35249446b07f40ac5fcc3205f2c048616efacchkuang s0 = sm1 + pitch; 609b35249446b07f40ac5fcc3205f2c048616efacchkuang s1 = s0 + pitch; 619b35249446b07f40ac5fcc3205f2c048616efacchkuang s2 = s - pitch; 629b35249446b07f40ac5fcc3205f2c048616efacchkuang s3 = s; 639b35249446b07f40ac5fcc3205f2c048616efacchkuang s4 = s + pitch; 649b35249446b07f40ac5fcc3205f2c048616efacchkuang s5 = s4 + pitch; 659b35249446b07f40ac5fcc3205f2c048616efacchkuang s6 = s5 + pitch; 669b35249446b07f40ac5fcc3205f2c048616efacchkuang 679b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 689b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p1], (%[s1]) \n\t" 699b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p2], (%[s2]) \n\t" 709b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p3], (%[s3]) \n\t" 719b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p4], (%[s4]) \n\t" 729b35249446b07f40ac5fcc3205f2c048616efacchkuang 739b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p1] "=&r" (p1), [p2] "=&r" (p2), [p3] "=&r" (p3), [p4] "=&r" (p4) 749b35249446b07f40ac5fcc3205f2c048616efacchkuang : [s1] "r" (s1), [s2] "r" (s2), [s3] "r" (s3), [s4] "r" (s4) 759b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 769b35249446b07f40ac5fcc3205f2c048616efacchkuang 779b35249446b07f40ac5fcc3205f2c048616efacchkuang /* if (p1 - p4 == 0) and (p2 - p3 == 0) 789b35249446b07f40ac5fcc3205f2c048616efacchkuang mask will be zero and filtering is not needed */ 799b35249446b07f40ac5fcc3205f2c048616efacchkuang if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { 809b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 819b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[pm1], (%[sm1]) \n\t" 829b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p0], (%[s0]) \n\t" 839b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p5], (%[s5]) \n\t" 849b35249446b07f40ac5fcc3205f2c048616efacchkuang "lw %[p6], (%[s6]) \n\t" 859b35249446b07f40ac5fcc3205f2c048616efacchkuang 869b35249446b07f40ac5fcc3205f2c048616efacchkuang : [pm1] "=&r" (pm1), [p0] "=&r" (p0), [p5] "=&r" (p5), 879b35249446b07f40ac5fcc3205f2c048616efacchkuang [p6] "=&r" (p6) 889b35249446b07f40ac5fcc3205f2c048616efacchkuang : [sm1] "r" (sm1), [s0] "r" (s0), [s5] "r" (s5), [s6] "r" (s6) 899b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 909b35249446b07f40ac5fcc3205f2c048616efacchkuang 919b35249446b07f40ac5fcc3205f2c048616efacchkuang vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, 929b35249446b07f40ac5fcc3205f2c048616efacchkuang pm1, p0, p3, p4, p5, p6, 939b35249446b07f40ac5fcc3205f2c048616efacchkuang thresh_vec, &hev, &mask); 949b35249446b07f40ac5fcc3205f2c048616efacchkuang 959b35249446b07f40ac5fcc3205f2c048616efacchkuang /* if mask == 0 do filtering is not needed */ 969b35249446b07f40ac5fcc3205f2c048616efacchkuang if (mask) { 979b35249446b07f40ac5fcc3205f2c048616efacchkuang /* filtering */ 989b35249446b07f40ac5fcc3205f2c048616efacchkuang vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); 999b35249446b07f40ac5fcc3205f2c048616efacchkuang 1009b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 1019b35249446b07f40ac5fcc3205f2c048616efacchkuang "sw %[p1], (%[s1]) \n\t" 1029b35249446b07f40ac5fcc3205f2c048616efacchkuang "sw %[p2], (%[s2]) \n\t" 1039b35249446b07f40ac5fcc3205f2c048616efacchkuang "sw %[p3], (%[s3]) \n\t" 1049b35249446b07f40ac5fcc3205f2c048616efacchkuang "sw %[p4], (%[s4]) \n\t" 1059b35249446b07f40ac5fcc3205f2c048616efacchkuang 1069b35249446b07f40ac5fcc3205f2c048616efacchkuang : 1079b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p1] "r" (p1), [p2] "r" (p2), [p3] "r" (p3), [p4] "r" (p4), 1089b35249446b07f40ac5fcc3205f2c048616efacchkuang [s1] "r" (s1), [s2] "r" (s2), [s3] "r" (s3), [s4] "r" (s4) 1099b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 1109b35249446b07f40ac5fcc3205f2c048616efacchkuang } 1119b35249446b07f40ac5fcc3205f2c048616efacchkuang } 1129b35249446b07f40ac5fcc3205f2c048616efacchkuang 1139b35249446b07f40ac5fcc3205f2c048616efacchkuang s = s + 4; 1149b35249446b07f40ac5fcc3205f2c048616efacchkuang } 1159b35249446b07f40ac5fcc3205f2c048616efacchkuang} 1169b35249446b07f40ac5fcc3205f2c048616efacchkuang 1179b35249446b07f40ac5fcc3205f2c048616efacchkuangvoid vp9_loop_filter_vertical_edge_dspr2(unsigned char *s, 1189b35249446b07f40ac5fcc3205f2c048616efacchkuang int pitch, 1199b35249446b07f40ac5fcc3205f2c048616efacchkuang const uint8_t *blimit, 1209b35249446b07f40ac5fcc3205f2c048616efacchkuang const uint8_t *limit, 1219b35249446b07f40ac5fcc3205f2c048616efacchkuang const uint8_t *thresh, 1229b35249446b07f40ac5fcc3205f2c048616efacchkuang int count) { 1239b35249446b07f40ac5fcc3205f2c048616efacchkuang uint8_t i; 1249b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t mask, hev; 1259b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; 1269b35249446b07f40ac5fcc3205f2c048616efacchkuang uint8_t *s1, *s2, *s3, *s4; 1279b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t prim1, prim2, sec3, sec4, prim3, prim4; 1289b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t thresh_vec, flimit_vec, limit_vec; 1299b35249446b07f40ac5fcc3205f2c048616efacchkuang uint32_t uflimit, ulimit, uthresh; 1309b35249446b07f40ac5fcc3205f2c048616efacchkuang 1319b35249446b07f40ac5fcc3205f2c048616efacchkuang uflimit = *blimit; 1329b35249446b07f40ac5fcc3205f2c048616efacchkuang ulimit = *limit; 1339b35249446b07f40ac5fcc3205f2c048616efacchkuang uthresh = *thresh; 1349b35249446b07f40ac5fcc3205f2c048616efacchkuang 1359b35249446b07f40ac5fcc3205f2c048616efacchkuang /* create quad-byte */ 1369b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 1379b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[thresh_vec], %[uthresh] \n\t" 1389b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[flimit_vec], %[uflimit] \n\t" 1399b35249446b07f40ac5fcc3205f2c048616efacchkuang "replv.qb %[limit_vec], %[ulimit] \n\t" 1409b35249446b07f40ac5fcc3205f2c048616efacchkuang 1419b35249446b07f40ac5fcc3205f2c048616efacchkuang : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), 1429b35249446b07f40ac5fcc3205f2c048616efacchkuang [limit_vec] "=r" (limit_vec) 1439b35249446b07f40ac5fcc3205f2c048616efacchkuang : [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit) 1449b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 1459b35249446b07f40ac5fcc3205f2c048616efacchkuang 1469b35249446b07f40ac5fcc3205f2c048616efacchkuang /* prefetch data for store */ 1479b35249446b07f40ac5fcc3205f2c048616efacchkuang vp9_prefetch_store(s + pitch); 1489b35249446b07f40ac5fcc3205f2c048616efacchkuang 1499b35249446b07f40ac5fcc3205f2c048616efacchkuang for (i = 0; i < 2; i++) { 1509b35249446b07f40ac5fcc3205f2c048616efacchkuang s1 = s; 1519b35249446b07f40ac5fcc3205f2c048616efacchkuang s2 = s + pitch; 1529b35249446b07f40ac5fcc3205f2c048616efacchkuang s3 = s2 + pitch; 1539b35249446b07f40ac5fcc3205f2c048616efacchkuang s4 = s3 + pitch; 1549b35249446b07f40ac5fcc3205f2c048616efacchkuang s = s4 + pitch; 1559b35249446b07f40ac5fcc3205f2c048616efacchkuang 1569b35249446b07f40ac5fcc3205f2c048616efacchkuang /* load quad-byte vectors 1579b35249446b07f40ac5fcc3205f2c048616efacchkuang * memory is 4 byte aligned 1589b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 1599b35249446b07f40ac5fcc3205f2c048616efacchkuang p2 = *((uint32_t *)(s1 - 4)); 1609b35249446b07f40ac5fcc3205f2c048616efacchkuang p6 = *((uint32_t *)(s1)); 1619b35249446b07f40ac5fcc3205f2c048616efacchkuang p1 = *((uint32_t *)(s2 - 4)); 1629b35249446b07f40ac5fcc3205f2c048616efacchkuang p5 = *((uint32_t *)(s2)); 1639b35249446b07f40ac5fcc3205f2c048616efacchkuang p0 = *((uint32_t *)(s3 - 4)); 1649b35249446b07f40ac5fcc3205f2c048616efacchkuang p4 = *((uint32_t *)(s3)); 1659b35249446b07f40ac5fcc3205f2c048616efacchkuang pm1 = *((uint32_t *)(s4 - 4)); 1669b35249446b07f40ac5fcc3205f2c048616efacchkuang p3 = *((uint32_t *)(s4)); 1679b35249446b07f40ac5fcc3205f2c048616efacchkuang 1689b35249446b07f40ac5fcc3205f2c048616efacchkuang /* transpose pm1, p0, p1, p2 */ 1699b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 1709b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" 1719b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" 1729b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" 1739b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" 1749b35249446b07f40ac5fcc3205f2c048616efacchkuang 1759b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" 1769b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" 1779b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" 1789b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" 1799b35249446b07f40ac5fcc3205f2c048616efacchkuang 1809b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" 1819b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" 1829b35249446b07f40ac5fcc3205f2c048616efacchkuang "append %[p1], %[sec3], 16 \n\t" 1839b35249446b07f40ac5fcc3205f2c048616efacchkuang "append %[pm1], %[sec4], 16 \n\t" 1849b35249446b07f40ac5fcc3205f2c048616efacchkuang 1859b35249446b07f40ac5fcc3205f2c048616efacchkuang : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), 1869b35249446b07f40ac5fcc3205f2c048616efacchkuang [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), 1879b35249446b07f40ac5fcc3205f2c048616efacchkuang [p2] "+r" (p2), [p1] "+r" (p1), [p0] "+r" (p0), [pm1] "+r" (pm1), 1889b35249446b07f40ac5fcc3205f2c048616efacchkuang [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) 1899b35249446b07f40ac5fcc3205f2c048616efacchkuang : 1909b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 1919b35249446b07f40ac5fcc3205f2c048616efacchkuang 1929b35249446b07f40ac5fcc3205f2c048616efacchkuang /* transpose p3, p4, p5, p6 */ 1939b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 1949b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" 1959b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" 1969b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" 1979b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" 1989b35249446b07f40ac5fcc3205f2c048616efacchkuang 1999b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" 2009b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" 2019b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" 2029b35249446b07f40ac5fcc3205f2c048616efacchkuang "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" 2039b35249446b07f40ac5fcc3205f2c048616efacchkuang 2049b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" 2059b35249446b07f40ac5fcc3205f2c048616efacchkuang "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" 2069b35249446b07f40ac5fcc3205f2c048616efacchkuang "append %[p5], %[sec3], 16 \n\t" 2079b35249446b07f40ac5fcc3205f2c048616efacchkuang "append %[p3], %[sec4], 16 \n\t" 2089b35249446b07f40ac5fcc3205f2c048616efacchkuang 2099b35249446b07f40ac5fcc3205f2c048616efacchkuang : [prim1] "=&r" (prim1), [prim2] "=&r" (prim2), 2109b35249446b07f40ac5fcc3205f2c048616efacchkuang [prim3] "=&r" (prim3), [prim4] "=&r" (prim4), 2119b35249446b07f40ac5fcc3205f2c048616efacchkuang [p6] "+r" (p6), [p5] "+r" (p5), [p4] "+r" (p4), [p3] "+r" (p3), 2129b35249446b07f40ac5fcc3205f2c048616efacchkuang [sec3] "=&r" (sec3), [sec4] "=&r" (sec4) 2139b35249446b07f40ac5fcc3205f2c048616efacchkuang : 2149b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 2159b35249446b07f40ac5fcc3205f2c048616efacchkuang 2169b35249446b07f40ac5fcc3205f2c048616efacchkuang /* if (p1 - p4 == 0) and (p2 - p3 == 0) 2179b35249446b07f40ac5fcc3205f2c048616efacchkuang * mask will be zero and filtering is not needed 2189b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 2199b35249446b07f40ac5fcc3205f2c048616efacchkuang if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { 2209b35249446b07f40ac5fcc3205f2c048616efacchkuang vp9_filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, 2219b35249446b07f40ac5fcc3205f2c048616efacchkuang p0, p3, p4, p5, p6, thresh_vec, 2229b35249446b07f40ac5fcc3205f2c048616efacchkuang &hev, &mask); 2239b35249446b07f40ac5fcc3205f2c048616efacchkuang 2249b35249446b07f40ac5fcc3205f2c048616efacchkuang /* if mask == 0 do filtering is not needed */ 2259b35249446b07f40ac5fcc3205f2c048616efacchkuang if (mask) { 2269b35249446b07f40ac5fcc3205f2c048616efacchkuang /* filtering */ 2279b35249446b07f40ac5fcc3205f2c048616efacchkuang vp9_filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); 2289b35249446b07f40ac5fcc3205f2c048616efacchkuang 2299b35249446b07f40ac5fcc3205f2c048616efacchkuang /* unpack processed 4x4 neighborhood 2309b35249446b07f40ac5fcc3205f2c048616efacchkuang * don't use transpose on output data 2319b35249446b07f40ac5fcc3205f2c048616efacchkuang * because memory isn't aligned 2329b35249446b07f40ac5fcc3205f2c048616efacchkuang */ 2339b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 2349b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p4], 1(%[s4]) \n\t" 2359b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p3], 0(%[s4]) \n\t" 2369b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p2], -1(%[s4]) \n\t" 2379b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p1], -2(%[s4]) \n\t" 2389b35249446b07f40ac5fcc3205f2c048616efacchkuang 2399b35249446b07f40ac5fcc3205f2c048616efacchkuang : 2409b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2), [p1] "r" (p1), 2419b35249446b07f40ac5fcc3205f2c048616efacchkuang [s4] "r" (s4) 2429b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 2439b35249446b07f40ac5fcc3205f2c048616efacchkuang 2449b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 2459b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p4], %[p4], 8 \n\t" 2469b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p3], %[p3], 8 \n\t" 2479b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p2], %[p2], 8 \n\t" 2489b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p1], %[p1], 8 \n\t" 2499b35249446b07f40ac5fcc3205f2c048616efacchkuang 2509b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) 2519b35249446b07f40ac5fcc3205f2c048616efacchkuang : 2529b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 2539b35249446b07f40ac5fcc3205f2c048616efacchkuang 2549b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 2559b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p4], 1(%[s3]) \n\t" 2569b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p3], 0(%[s3]) \n\t" 2579b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p2], -1(%[s3]) \n\t" 2589b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p1], -2(%[s3]) \n\t" 2599b35249446b07f40ac5fcc3205f2c048616efacchkuang 2609b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p1] "+r" (p1) 2619b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2), [s3] "r" (s3) 2629b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 2639b35249446b07f40ac5fcc3205f2c048616efacchkuang 2649b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 2659b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p4], %[p4], 8 \n\t" 2669b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p3], %[p3], 8 \n\t" 2679b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p2], %[p2], 8 \n\t" 2689b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p1], %[p1], 8 \n\t" 2699b35249446b07f40ac5fcc3205f2c048616efacchkuang 2709b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) 2719b35249446b07f40ac5fcc3205f2c048616efacchkuang : 2729b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 2739b35249446b07f40ac5fcc3205f2c048616efacchkuang 2749b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 2759b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p4], 1(%[s2]) \n\t" 2769b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p3], 0(%[s2]) \n\t" 2779b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p2], -1(%[s2]) \n\t" 2789b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p1], -2(%[s2]) \n\t" 2799b35249446b07f40ac5fcc3205f2c048616efacchkuang 2809b35249446b07f40ac5fcc3205f2c048616efacchkuang : 2819b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2), [p1] "r" (p1), 2829b35249446b07f40ac5fcc3205f2c048616efacchkuang [s2] "r" (s2) 2839b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 2849b35249446b07f40ac5fcc3205f2c048616efacchkuang 2859b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 2869b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p4], %[p4], 8 \n\t" 2879b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p3], %[p3], 8 \n\t" 2889b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p2], %[p2], 8 \n\t" 2899b35249446b07f40ac5fcc3205f2c048616efacchkuang "srl %[p1], %[p1], 8 \n\t" 2909b35249446b07f40ac5fcc3205f2c048616efacchkuang 2919b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p4] "+r" (p4), [p3] "+r" (p3), [p2] "+r" (p2), [p1] "+r" (p1) 2929b35249446b07f40ac5fcc3205f2c048616efacchkuang : 2939b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 2949b35249446b07f40ac5fcc3205f2c048616efacchkuang 2959b35249446b07f40ac5fcc3205f2c048616efacchkuang __asm__ __volatile__ ( 2969b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p4], 1(%[s1]) \n\t" 2979b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p3], 0(%[s1]) \n\t" 2989b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p2], -1(%[s1]) \n\t" 2999b35249446b07f40ac5fcc3205f2c048616efacchkuang "sb %[p1], -2(%[s1]) \n\t" 3009b35249446b07f40ac5fcc3205f2c048616efacchkuang 3019b35249446b07f40ac5fcc3205f2c048616efacchkuang : 3029b35249446b07f40ac5fcc3205f2c048616efacchkuang : [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2), [p1] "r" (p1), 3039b35249446b07f40ac5fcc3205f2c048616efacchkuang [s1] "r" (s1) 3049b35249446b07f40ac5fcc3205f2c048616efacchkuang ); 3059b35249446b07f40ac5fcc3205f2c048616efacchkuang } 3069b35249446b07f40ac5fcc3205f2c048616efacchkuang } 3079b35249446b07f40ac5fcc3205f2c048616efacchkuang } 3089b35249446b07f40ac5fcc3205f2c048616efacchkuang} 3099b35249446b07f40ac5fcc3205f2c048616efacchkuang#endif // #if HAVE_DSPR2 310