1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include "vp8/encoder/variance.h"
13#include "vp8/common/pragmas.h"
14#include "vpx_ports/mem.h"
15
16extern unsigned int vp8_get16x16var_sse2
17(
18    const unsigned char *src_ptr,
19    int source_stride,
20    const unsigned char *ref_ptr,
21    int recon_stride,
22    unsigned int *SSE,
23    int *Sum
24);
25extern void vp8_half_horiz_vert_variance16x_h_sse2
26(
27    const unsigned char *ref_ptr,
28    int ref_pixels_per_line,
29    const unsigned char *src_ptr,
30    int src_pixels_per_line,
31    unsigned int Height,
32    int *sum,
33    unsigned int *sumsquared
34);
35extern void vp8_half_horiz_variance16x_h_sse2
36(
37    const unsigned char *ref_ptr,
38    int ref_pixels_per_line,
39    const unsigned char *src_ptr,
40    int src_pixels_per_line,
41    unsigned int Height,
42    int *sum,
43    unsigned int *sumsquared
44);
45extern void vp8_half_vert_variance16x_h_sse2
46(
47    const unsigned char *ref_ptr,
48    int ref_pixels_per_line,
49    const unsigned char *src_ptr,
50    int src_pixels_per_line,
51    unsigned int Height,
52    int *sum,
53    unsigned int *sumsquared
54);
55extern void vp8_filter_block2d_bil_var_ssse3
56(
57    const unsigned char *ref_ptr,
58    int ref_pixels_per_line,
59    const unsigned char *src_ptr,
60    int src_pixels_per_line,
61    unsigned int Height,
62    int  xoffset,
63    int  yoffset,
64    int *sum,
65    unsigned int *sumsquared
66);
67
68unsigned int vp8_sub_pixel_variance16x16_ssse3
69(
70    const unsigned char  *src_ptr,
71    int  src_pixels_per_line,
72    int  xoffset,
73    int  yoffset,
74    const unsigned char *dst_ptr,
75    int dst_pixels_per_line,
76    unsigned int *sse
77)
78{
79    int xsum0;
80    unsigned int xxsum0;
81
82    // note we could avoid these if statements if the calling function
83    // just called the appropriate functions inside.
84    if (xoffset == 4 && yoffset == 0)
85    {
86        vp8_half_horiz_variance16x_h_sse2(
87            src_ptr, src_pixels_per_line,
88            dst_ptr, dst_pixels_per_line, 16,
89            &xsum0, &xxsum0);
90    }
91    else if (xoffset == 0 && yoffset == 4)
92    {
93        vp8_half_vert_variance16x_h_sse2(
94            src_ptr, src_pixels_per_line,
95            dst_ptr, dst_pixels_per_line, 16,
96            &xsum0, &xxsum0);
97    }
98    else if (xoffset == 4 && yoffset == 4)
99    {
100        vp8_half_horiz_vert_variance16x_h_sse2(
101            src_ptr, src_pixels_per_line,
102            dst_ptr, dst_pixels_per_line, 16,
103            &xsum0, &xxsum0);
104    }
105    else
106    {
107        vp8_filter_block2d_bil_var_ssse3(
108            src_ptr, src_pixels_per_line,
109            dst_ptr, dst_pixels_per_line, 16,
110            xoffset, yoffset,
111            &xsum0, &xxsum0);
112    }
113
114    *sse = xxsum0;
115    return (xxsum0 - ((xsum0 * xsum0) >> 8));
116}
117
118unsigned int vp8_sub_pixel_variance16x8_ssse3
119(
120    const unsigned char  *src_ptr,
121    int  src_pixels_per_line,
122    int  xoffset,
123    int  yoffset,
124    const unsigned char *dst_ptr,
125    int dst_pixels_per_line,
126    unsigned int *sse
127
128)
129{
130    int xsum0;
131    unsigned int xxsum0;
132
133    if (xoffset == 4 && yoffset == 0)
134    {
135        vp8_half_horiz_variance16x_h_sse2(
136            src_ptr, src_pixels_per_line,
137            dst_ptr, dst_pixels_per_line, 8,
138            &xsum0, &xxsum0);
139    }
140    else if (xoffset == 0 && yoffset == 4)
141    {
142        vp8_half_vert_variance16x_h_sse2(
143            src_ptr, src_pixels_per_line,
144            dst_ptr, dst_pixels_per_line, 8,
145            &xsum0, &xxsum0);
146    }
147    else if (xoffset == 4 && yoffset == 4)
148    {
149        vp8_half_horiz_vert_variance16x_h_sse2(
150            src_ptr, src_pixels_per_line,
151            dst_ptr, dst_pixels_per_line, 8,
152            &xsum0, &xxsum0);
153    }
154    else
155    {
156        vp8_filter_block2d_bil_var_ssse3(
157            src_ptr, src_pixels_per_line,
158            dst_ptr, dst_pixels_per_line, 8,
159            xoffset, yoffset,
160            &xsum0, &xxsum0);
161    }
162
163    *sse = xxsum0;
164    return (xxsum0 - ((xsum0 * xsum0) >> 7));
165}
166