1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "vpx_config.h"
12#include "vp8/common/variance.h"
13#include "vp8/common/pragmas.h"
14#include "vpx_ports/mem.h"
15
16extern unsigned int vp8_get16x16var_sse2
17(
18    const unsigned char *src_ptr,
19    int source_stride,
20    const unsigned char *ref_ptr,
21    int recon_stride,
22    unsigned int *SSE,
23    int *Sum
24);
25extern void vp8_half_horiz_vert_variance16x_h_sse2
26(
27    const unsigned char *ref_ptr,
28    int ref_pixels_per_line,
29    const unsigned char *src_ptr,
30    int src_pixels_per_line,
31    unsigned int Height,
32    int *sum,
33    unsigned int *sumsquared
34);
35extern void vp8_half_horiz_variance16x_h_sse2
36(
37    const unsigned char *ref_ptr,
38    int ref_pixels_per_line,
39    const unsigned char *src_ptr,
40    int src_pixels_per_line,
41    unsigned int Height,
42    int *sum,
43    unsigned int *sumsquared
44);
45extern void vp8_half_vert_variance16x_h_sse2
46(
47    const unsigned char *ref_ptr,
48    int ref_pixels_per_line,
49    const unsigned char *src_ptr,
50    int src_pixels_per_line,
51    unsigned int Height,
52    int *sum,
53    unsigned int *sumsquared
54);
55extern void vp8_filter_block2d_bil_var_ssse3
56(
57    const unsigned char *ref_ptr,
58    int ref_pixels_per_line,
59    const unsigned char *src_ptr,
60    int src_pixels_per_line,
61    unsigned int Height,
62    int  xoffset,
63    int  yoffset,
64    int *sum,
65    unsigned int *sumsquared
66);
67
68unsigned int vp8_sub_pixel_variance16x16_ssse3
69(
70    const unsigned char  *src_ptr,
71    int  src_pixels_per_line,
72    int  xoffset,
73    int  yoffset,
74    const unsigned char *dst_ptr,
75    int dst_pixels_per_line,
76    unsigned int *sse
77)
78{
79    int xsum0;
80    unsigned int xxsum0;
81
82    /* note we could avoid these if statements if the calling function
83     * just called the appropriate functions inside.
84     */
85    if (xoffset == 4 && yoffset == 0)
86    {
87        vp8_half_horiz_variance16x_h_sse2(
88            src_ptr, src_pixels_per_line,
89            dst_ptr, dst_pixels_per_line, 16,
90            &xsum0, &xxsum0);
91    }
92    else if (xoffset == 0 && yoffset == 4)
93    {
94        vp8_half_vert_variance16x_h_sse2(
95            src_ptr, src_pixels_per_line,
96            dst_ptr, dst_pixels_per_line, 16,
97            &xsum0, &xxsum0);
98    }
99    else if (xoffset == 4 && yoffset == 4)
100    {
101        vp8_half_horiz_vert_variance16x_h_sse2(
102            src_ptr, src_pixels_per_line,
103            dst_ptr, dst_pixels_per_line, 16,
104            &xsum0, &xxsum0);
105    }
106    else
107    {
108        vp8_filter_block2d_bil_var_ssse3(
109            src_ptr, src_pixels_per_line,
110            dst_ptr, dst_pixels_per_line, 16,
111            xoffset, yoffset,
112            &xsum0, &xxsum0);
113    }
114
115    *sse = xxsum0;
116    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
117}
118
119unsigned int vp8_sub_pixel_variance16x8_ssse3
120(
121    const unsigned char  *src_ptr,
122    int  src_pixels_per_line,
123    int  xoffset,
124    int  yoffset,
125    const unsigned char *dst_ptr,
126    int dst_pixels_per_line,
127    unsigned int *sse
128
129)
130{
131    int xsum0;
132    unsigned int xxsum0;
133
134    if (xoffset == 4 && yoffset == 0)
135    {
136        vp8_half_horiz_variance16x_h_sse2(
137            src_ptr, src_pixels_per_line,
138            dst_ptr, dst_pixels_per_line, 8,
139            &xsum0, &xxsum0);
140    }
141    else if (xoffset == 0 && yoffset == 4)
142    {
143        vp8_half_vert_variance16x_h_sse2(
144            src_ptr, src_pixels_per_line,
145            dst_ptr, dst_pixels_per_line, 8,
146            &xsum0, &xxsum0);
147    }
148    else if (xoffset == 4 && yoffset == 4)
149    {
150        vp8_half_horiz_vert_variance16x_h_sse2(
151            src_ptr, src_pixels_per_line,
152            dst_ptr, dst_pixels_per_line, 8,
153            &xsum0, &xxsum0);
154    }
155    else
156    {
157        vp8_filter_block2d_bil_var_ssse3(
158            src_ptr, src_pixels_per_line,
159            dst_ptr, dst_pixels_per_line, 8,
160            xoffset, yoffset,
161            &xsum0, &xxsum0);
162    }
163
164    *sse = xxsum0;
165    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
166}
167