12f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan/* 21b362b15af34006e6a11974088a46d42b903418eJohann * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 32f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * 42f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * Use of this source code is governed by a BSD-style license 52f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * that can be found in the LICENSE file in the root of the source 62f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * tree. An additional intellectual property rights grant can be found 72f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * in the file PATENTS. All contributing project authors may 82f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan * be found in the AUTHORS file in the root of the source tree. 92f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan */ 102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 121b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h" 13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp8_rtcd.h" 141b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx/vpx_integer.h" 152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 161b362b15af34006e6a11974088a46d42b903418eJohann#if HAVE_DSPR2 171b362b15af34006e6a11974088a46d42b903418eJohanninline void prefetch_load_int(unsigned char *src) 181b362b15af34006e6a11974088a46d42b903418eJohann{ 192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 202f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "pref 0, 0(%[src]) \n\t" 212f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : 222f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src] "r" (src) 232f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 242f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 252f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 262f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 271b362b15af34006e6a11974088a46d42b903418eJohann__inline void vp8_copy_mem16x16_dspr2( 281b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src, 292f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int src_stride, 301b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst, 312f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int dst_stride) 322f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 332f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int r; 342f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int a0, a1, a2, a3; 352f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 362f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (r = 16; r--;) 372f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 382f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* load src data in cache memory */ 392f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load_int(src + src_stride); 402f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 412f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* use unaligned memory load and store */ 422f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 432f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[a0], 0(%[src]) \n\t" 442f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[a1], 4(%[src]) \n\t" 452f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[a2], 8(%[src]) \n\t" 462f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[a3], 12(%[src]) \n\t" 472f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[a0], 0(%[dst]) \n\t" 482f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[a1], 4(%[dst]) \n\t" 492f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[a2], 8(%[dst]) \n\t" 502f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[a3], 12(%[dst]) \n\t" 512f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [a0] "=&r" (a0), [a1] "=&r" (a1), 522f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan [a2] "=&r" (a2), [a3] "=&r" (a3) 532f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src] "r" (src), [dst] "r" (dst) 542f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 552f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 562f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src += src_stride; 572f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst += dst_stride; 582f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 592f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 602f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 612f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 621b362b15af34006e6a11974088a46d42b903418eJohann__inline void vp8_copy_mem8x8_dspr2( 631b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src, 642f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int src_stride, 651b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst, 662f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int dst_stride) 672f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 682f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int r; 692f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int a0, a1; 702f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 712f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* load src data in cache memory */ 722f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load_int(src + src_stride); 732f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 742f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (r = 8; r--;) 752f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 762f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* use unaligned memory load and store */ 772f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 782f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[a0], 0(%[src]) \n\t" 792f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[a1], 4(%[src]) \n\t" 802f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[a0], 0(%[dst]) \n\t" 812f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[a1], 4(%[dst]) \n\t" 822f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [a0] "=&r" (a0), [a1] "=&r" (a1) 832f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src] "r" (src), [dst] "r" (dst) 842f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 852f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 862f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src += src_stride; 872f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst += dst_stride; 882f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 892f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 902f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 912f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 921b362b15af34006e6a11974088a46d42b903418eJohann__inline void vp8_copy_mem8x4_dspr2( 931b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT src, 942f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int src_stride, 951b362b15af34006e6a11974088a46d42b903418eJohann unsigned char *RESTRICT dst, 962f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int dst_stride) 972f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan{ 982f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan int r; 992f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan unsigned int a0, a1; 1002f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1012f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* load src data in cache memory */ 1022f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan prefetch_load_int(src + src_stride); 1032f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1042f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan for (r = 4; r--;) 1052f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan { 1062f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan /* use unaligned memory load and store */ 1072f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan __asm__ __volatile__ ( 1082f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[a0], 0(%[src]) \n\t" 1092f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "ulw %[a1], 4(%[src]) \n\t" 1102f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[a0], 0(%[dst]) \n\t" 1112f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan "sw %[a1], 4(%[dst]) \n\t" 1122f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [a0] "=&r" (a0), [a1] "=&r" (a1) 1132f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan : [src] "r" (src), [dst] "r" (dst) 1142f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan ); 1152f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan 1162f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan src += src_stride; 1172f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan dst += dst_stride; 1182f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan } 1192f01f9a5c363613e7389fb28c250edcd4509f815Dragan Mrdjan} 1201b362b15af34006e6a11974088a46d42b903418eJohann 1211b362b15af34006e6a11974088a46d42b903418eJohann#endif 122