1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/* 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan * 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan * Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan * that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan * tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan * in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan * be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */ 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_config.h" 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp8_rtcd.h" 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx/vpx_integer.h" 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_DSPR2 17233d2500723e5594f3e7c70896ffeeef32b9c950ywaninline void prefetch_load_int(unsigned char *src) 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan "pref 0, 0(%[src]) \n\t" 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan : 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [src] "r" (src) 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan__inline void vp8_copy_mem16x16_dspr2( 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned char *RESTRICT src, 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned char *RESTRICT dst, 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride) 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan int r; 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int a0, a1, a2, a3; 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (r = 16; r--;) 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* load src data in cache memory */ 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan prefetch_load_int(src + src_stride); 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* use unaligned memory load and store */ 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[a0], 0(%[src]) \n\t" 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[a1], 4(%[src]) \n\t" 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[a2], 8(%[src]) \n\t" 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[a3], 12(%[src]) \n\t" 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[a0], 0(%[dst]) \n\t" 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[a1], 4(%[dst]) \n\t" 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[a2], 8(%[dst]) \n\t" 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[a3], 12(%[dst]) \n\t" 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [a0] "=&r" (a0), [a1] "=&r" (a1), 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan [a2] "=&r" (a2), [a3] "=&r" (a3) 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [src] "r" (src), [dst] "r" (dst) 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan__inline void vp8_copy_mem8x8_dspr2( 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned char *RESTRICT src, 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned char *RESTRICT dst, 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride) 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan int r; 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int a0, a1; 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* load src data in cache memory */ 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan prefetch_load_int(src + src_stride); 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (r = 8; r--;) 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* use unaligned memory load and store */ 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[a0], 0(%[src]) \n\t" 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[a1], 4(%[src]) \n\t" 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[a0], 0(%[dst]) \n\t" 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[a1], 4(%[dst]) \n\t" 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [a0] "=&r" (a0), [a1] "=&r" (a1) 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [src] "r" (src), [dst] "r" (dst) 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan__inline void vp8_copy_mem8x4_dspr2( 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned char *RESTRICT src, 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan int src_stride, 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned char *RESTRICT dst, 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan int dst_stride) 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan{ 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan int r; 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan unsigned int a0, a1; 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* load src data in cache memory */ 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan prefetch_load_int(src + src_stride); 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan for (r = 4; r--;) 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan { 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan /* use unaligned memory load and store */ 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan __asm__ __volatile__ ( 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[a0], 0(%[src]) \n\t" 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan "ulw %[a1], 4(%[src]) \n\t" 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[a0], 0(%[dst]) \n\t" 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan "sw %[a1], 4(%[dst]) \n\t" 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [a0] "=&r" (a0), [a1] "=&r" (a1) 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan : [src] "r" (src), [dst] "r" (dst) 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan ); 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan src += src_stride; 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan dst += dst_stride; 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan } 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan} 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif 122