1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "vpx_config.h"
12#include "vp8/common/variance.h"
13#include "vpx_ports/mem.h"
14
15extern unsigned int vp8_get16x16var_sse2
16(
17    const unsigned char *src_ptr,
18    int source_stride,
19    const unsigned char *ref_ptr,
20    int recon_stride,
21    unsigned int *SSE,
22    int *Sum
23);
24extern void vp8_half_horiz_vert_variance16x_h_sse2
25(
26    const unsigned char *ref_ptr,
27    int ref_pixels_per_line,
28    const unsigned char *src_ptr,
29    int src_pixels_per_line,
30    unsigned int Height,
31    int *sum,
32    unsigned int *sumsquared
33);
34extern void vp8_half_horiz_variance16x_h_sse2
35(
36    const unsigned char *ref_ptr,
37    int ref_pixels_per_line,
38    const unsigned char *src_ptr,
39    int src_pixels_per_line,
40    unsigned int Height,
41    int *sum,
42    unsigned int *sumsquared
43);
44extern void vp8_half_vert_variance16x_h_sse2
45(
46    const unsigned char *ref_ptr,
47    int ref_pixels_per_line,
48    const unsigned char *src_ptr,
49    int src_pixels_per_line,
50    unsigned int Height,
51    int *sum,
52    unsigned int *sumsquared
53);
54extern void vp8_filter_block2d_bil_var_ssse3
55(
56    const unsigned char *ref_ptr,
57    int ref_pixels_per_line,
58    const unsigned char *src_ptr,
59    int src_pixels_per_line,
60    unsigned int Height,
61    int  xoffset,
62    int  yoffset,
63    int *sum,
64    unsigned int *sumsquared
65);
66
67unsigned int vp8_sub_pixel_variance16x16_ssse3
68(
69    const unsigned char  *src_ptr,
70    int  src_pixels_per_line,
71    int  xoffset,
72    int  yoffset,
73    const unsigned char *dst_ptr,
74    int dst_pixels_per_line,
75    unsigned int *sse
76)
77{
78    int xsum0;
79    unsigned int xxsum0;
80
81    /* note we could avoid these if statements if the calling function
82     * just called the appropriate functions inside.
83     */
84    if (xoffset == 4 && yoffset == 0)
85    {
86        vp8_half_horiz_variance16x_h_sse2(
87            src_ptr, src_pixels_per_line,
88            dst_ptr, dst_pixels_per_line, 16,
89            &xsum0, &xxsum0);
90    }
91    else if (xoffset == 0 && yoffset == 4)
92    {
93        vp8_half_vert_variance16x_h_sse2(
94            src_ptr, src_pixels_per_line,
95            dst_ptr, dst_pixels_per_line, 16,
96            &xsum0, &xxsum0);
97    }
98    else if (xoffset == 4 && yoffset == 4)
99    {
100        vp8_half_horiz_vert_variance16x_h_sse2(
101            src_ptr, src_pixels_per_line,
102            dst_ptr, dst_pixels_per_line, 16,
103            &xsum0, &xxsum0);
104    }
105    else
106    {
107        vp8_filter_block2d_bil_var_ssse3(
108            src_ptr, src_pixels_per_line,
109            dst_ptr, dst_pixels_per_line, 16,
110            xoffset, yoffset,
111            &xsum0, &xxsum0);
112    }
113
114    *sse = xxsum0;
115    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
116}
117
118unsigned int vp8_sub_pixel_variance16x8_ssse3
119(
120    const unsigned char  *src_ptr,
121    int  src_pixels_per_line,
122    int  xoffset,
123    int  yoffset,
124    const unsigned char *dst_ptr,
125    int dst_pixels_per_line,
126    unsigned int *sse
127
128)
129{
130    int xsum0;
131    unsigned int xxsum0;
132
133    if (xoffset == 4 && yoffset == 0)
134    {
135        vp8_half_horiz_variance16x_h_sse2(
136            src_ptr, src_pixels_per_line,
137            dst_ptr, dst_pixels_per_line, 8,
138            &xsum0, &xxsum0);
139    }
140    else if (xoffset == 0 && yoffset == 4)
141    {
142        vp8_half_vert_variance16x_h_sse2(
143            src_ptr, src_pixels_per_line,
144            dst_ptr, dst_pixels_per_line, 8,
145            &xsum0, &xxsum0);
146    }
147    else if (xoffset == 4 && yoffset == 4)
148    {
149        vp8_half_horiz_vert_variance16x_h_sse2(
150            src_ptr, src_pixels_per_line,
151            dst_ptr, dst_pixels_per_line, 8,
152            &xsum0, &xxsum0);
153    }
154    else
155    {
156        vp8_filter_block2d_bil_var_ssse3(
157            src_ptr, src_pixels_per_line,
158            dst_ptr, dst_pixels_per_line, 8,
159            xoffset, yoffset,
160            &xsum0, &xxsum0);
161    }
162
163    *sse = xxsum0;
164    return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
165}
166