1/* 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "vpx_config.h" 12#include "vp8_rtcd.h" 13#include "vpx/vpx_integer.h" 14 15#if HAVE_DSPR2 16inline void prefetch_load_int(unsigned char *src) { 17 __asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src)); 18} 19 20__inline void vp8_copy_mem16x16_dspr2(unsigned char *RESTRICT src, 21 int src_stride, 22 unsigned char *RESTRICT dst, 23 int dst_stride) { 24 int r; 25 unsigned int a0, a1, a2, a3; 26 27 for (r = 16; r--;) { 28 /* load src data in cache memory */ 29 prefetch_load_int(src + src_stride); 30 31 /* use unaligned memory load and store */ 32 __asm__ __volatile__( 33 "ulw %[a0], 0(%[src]) \n\t" 34 "ulw %[a1], 4(%[src]) \n\t" 35 "ulw %[a2], 8(%[src]) \n\t" 36 "ulw %[a3], 12(%[src]) \n\t" 37 "sw %[a0], 0(%[dst]) \n\t" 38 "sw %[a1], 4(%[dst]) \n\t" 39 "sw %[a2], 8(%[dst]) \n\t" 40 "sw %[a3], 12(%[dst]) \n\t" 41 : [a0] "=&r"(a0), [a1] "=&r"(a1), [a2] "=&r"(a2), [a3] "=&r"(a3) 42 : [src] "r"(src), [dst] "r"(dst)); 43 44 src += src_stride; 45 dst += dst_stride; 46 } 47} 48 49__inline void vp8_copy_mem8x8_dspr2(unsigned char *RESTRICT src, int src_stride, 50 unsigned char *RESTRICT dst, 51 int dst_stride) { 52 int r; 53 unsigned int a0, a1; 54 55 /* load src data in cache memory */ 56 prefetch_load_int(src + src_stride); 57 58 for (r = 8; r--;) { 59 /* use unaligned memory load and store */ 60 __asm__ __volatile__( 61 "ulw %[a0], 0(%[src]) \n\t" 62 "ulw %[a1], 4(%[src]) \n\t" 63 "sw %[a0], 0(%[dst]) \n\t" 64 "sw %[a1], 4(%[dst]) \n\t" 65 : [a0] "=&r"(a0), [a1] "=&r"(a1) 66 : [src] "r"(src), [dst] "r"(dst)); 67 68 src += src_stride; 69 dst += dst_stride; 70 } 71} 72 73__inline void vp8_copy_mem8x4_dspr2(unsigned char *RESTRICT src, int src_stride, 74 unsigned char *RESTRICT dst, 75 int dst_stride) { 76 int r; 77 unsigned int a0, a1; 78 79 /* load src data in cache memory */ 80 prefetch_load_int(src + src_stride); 81 82 for (r = 4; r--;) { 83 /* use unaligned memory load and store */ 84 __asm__ __volatile__( 85 "ulw %[a0], 0(%[src]) \n\t" 86 "ulw %[a1], 4(%[src]) \n\t" 87 "sw %[a0], 0(%[dst]) \n\t" 88 "sw %[a1], 4(%[dst]) \n\t" 89 : [a0] "=&r"(a0), [a1] "=&r"(a1) 90 : [src] "r"(src), [dst] "r"(dst)); 91 92 src += src_stride; 93 dst += dst_stride; 94 } 95} 96 97#endif 98