1233d2500723e5594f3e7c70896ffeeef32b9c950ywan/*
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan *
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan *  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan */
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx_config.h"
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vp8_rtcd.h"
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan#include "vpx/vpx_integer.h"
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan#if HAVE_DSPR2
17233d2500723e5594f3e7c70896ffeeef32b9c950ywaninline void prefetch_load_int(unsigned char *src)
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan    __asm__ __volatile__ (
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan        "pref   0,  0(%[src])   \n\t"
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan        :
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan        : [src] "r" (src)
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan    );
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan__inline void vp8_copy_mem16x16_dspr2(
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char *RESTRICT src,
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int src_stride,
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char *RESTRICT dst,
31233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int dst_stride)
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int r;
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int a0, a1, a2, a3;
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (r = 16; r--;)
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan        /* load src data in cache memory */
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan        prefetch_load_int(src + src_stride);
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan        /* use unaligned memory load and store */
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "ulw    %[a0], 0(%[src])            \n\t"
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "ulw    %[a1], 4(%[src])            \n\t"
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "ulw    %[a2], 8(%[src])            \n\t"
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "ulw    %[a3], 12(%[src])           \n\t"
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sw     %[a0], 0(%[dst])            \n\t"
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sw     %[a1], 4(%[dst])            \n\t"
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sw     %[a2], 8(%[dst])            \n\t"
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sw     %[a3], 12(%[dst])           \n\t"
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [a0] "=&r" (a0), [a1] "=&r" (a1),
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan              [a2] "=&r" (a2), [a3] "=&r" (a3)
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [src] "r" (src), [dst] "r" (dst)
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src += src_stride;
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst += dst_stride;
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
60233d2500723e5594f3e7c70896ffeeef32b9c950ywan
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan__inline void vp8_copy_mem8x8_dspr2(
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char *RESTRICT src,
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int src_stride,
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char *RESTRICT dst,
66233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int dst_stride)
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int r;
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int a0, a1;
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan
71233d2500723e5594f3e7c70896ffeeef32b9c950ywan    /* load src data in cache memory */
72233d2500723e5594f3e7c70896ffeeef32b9c950ywan    prefetch_load_int(src + src_stride);
73233d2500723e5594f3e7c70896ffeeef32b9c950ywan
74233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (r = 8; r--;)
75233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
76233d2500723e5594f3e7c70896ffeeef32b9c950ywan        /* use unaligned memory load and store */
77233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
78233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "ulw    %[a0], 0(%[src])            \n\t"
79233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "ulw    %[a1], 4(%[src])            \n\t"
80233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sw     %[a0], 0(%[dst])            \n\t"
81233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sw     %[a1], 4(%[dst])            \n\t"
82233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [a0] "=&r" (a0), [a1] "=&r" (a1)
83233d2500723e5594f3e7c70896ffeeef32b9c950ywan            : [src] "r" (src), [dst] "r" (dst)
84233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
85233d2500723e5594f3e7c70896ffeeef32b9c950ywan
86233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src += src_stride;
87233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst += dst_stride;
88233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
89233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
90233d2500723e5594f3e7c70896ffeeef32b9c950ywan
91233d2500723e5594f3e7c70896ffeeef32b9c950ywan
92233d2500723e5594f3e7c70896ffeeef32b9c950ywan__inline void vp8_copy_mem8x4_dspr2(
93233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char *RESTRICT src,
94233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int src_stride,
95233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned char *RESTRICT dst,
96233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int dst_stride)
97233d2500723e5594f3e7c70896ffeeef32b9c950ywan{
98233d2500723e5594f3e7c70896ffeeef32b9c950ywan    int r;
99233d2500723e5594f3e7c70896ffeeef32b9c950ywan    unsigned int a0, a1;
100233d2500723e5594f3e7c70896ffeeef32b9c950ywan
101233d2500723e5594f3e7c70896ffeeef32b9c950ywan    /* load src data in cache memory */
102233d2500723e5594f3e7c70896ffeeef32b9c950ywan    prefetch_load_int(src + src_stride);
103233d2500723e5594f3e7c70896ffeeef32b9c950ywan
104233d2500723e5594f3e7c70896ffeeef32b9c950ywan    for (r = 4; r--;)
105233d2500723e5594f3e7c70896ffeeef32b9c950ywan    {
106233d2500723e5594f3e7c70896ffeeef32b9c950ywan        /* use unaligned memory load and store */
107233d2500723e5594f3e7c70896ffeeef32b9c950ywan        __asm__ __volatile__ (
108233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "ulw    %[a0], 0(%[src])            \n\t"
109233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "ulw    %[a1], 4(%[src])            \n\t"
110233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sw     %[a0], 0(%[dst])            \n\t"
111233d2500723e5594f3e7c70896ffeeef32b9c950ywan            "sw     %[a1], 4(%[dst])            \n\t"
112233d2500723e5594f3e7c70896ffeeef32b9c950ywan           : [a0] "=&r" (a0), [a1] "=&r" (a1)
113233d2500723e5594f3e7c70896ffeeef32b9c950ywan           : [src] "r" (src), [dst] "r" (dst)
114233d2500723e5594f3e7c70896ffeeef32b9c950ywan        );
115233d2500723e5594f3e7c70896ffeeef32b9c950ywan
116233d2500723e5594f3e7c70896ffeeef32b9c950ywan        src += src_stride;
117233d2500723e5594f3e7c70896ffeeef32b9c950ywan        dst += dst_stride;
118233d2500723e5594f3e7c70896ffeeef32b9c950ywan    }
119233d2500723e5594f3e7c70896ffeeef32b9c950ywan}
120233d2500723e5594f3e7c70896ffeeef32b9c950ywan
121233d2500723e5594f3e7c70896ffeeef32b9c950ywan#endif
122