1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include "variance.h"
13#include "filter.h"
14
15
16unsigned int vp8_get_mb_ss_c
17(
18    const short *src_ptr
19)
20{
21    unsigned int i = 0, sum = 0;
22
23    do
24    {
25        sum += (src_ptr[i] * src_ptr[i]);
26        i++;
27    }
28    while (i < 256);
29
30    return sum;
31}
32
33
34static void variance(
35    const unsigned char *src_ptr,
36    int  source_stride,
37    const unsigned char *ref_ptr,
38    int  recon_stride,
39    int  w,
40    int  h,
41    unsigned int *sse,
42    int *sum)
43{
44    int i, j;
45    int diff;
46
47    *sum = 0;
48    *sse = 0;
49
50    for (i = 0; i < h; i++)
51    {
52        for (j = 0; j < w; j++)
53        {
54            diff = src_ptr[j] - ref_ptr[j];
55            *sum += diff;
56            *sse += diff * diff;
57        }
58
59        src_ptr += source_stride;
60        ref_ptr += recon_stride;
61    }
62}
63
64
65unsigned int vp8_variance16x16_c(
66    const unsigned char *src_ptr,
67    int  source_stride,
68    const unsigned char *ref_ptr,
69    int  recon_stride,
70    unsigned int *sse)
71{
72    unsigned int var;
73    int avg;
74
75
76    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
77    *sse = var;
78    return (var - (((unsigned int)avg * avg) >> 8));
79}
80
81unsigned int vp8_variance8x16_c(
82    const unsigned char *src_ptr,
83    int  source_stride,
84    const unsigned char *ref_ptr,
85    int  recon_stride,
86    unsigned int *sse)
87{
88    unsigned int var;
89    int avg;
90
91
92    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
93    *sse = var;
94    return (var - (((unsigned int)avg * avg) >> 7));
95}
96
97unsigned int vp8_variance16x8_c(
98    const unsigned char *src_ptr,
99    int  source_stride,
100    const unsigned char *ref_ptr,
101    int  recon_stride,
102    unsigned int *sse)
103{
104    unsigned int var;
105    int avg;
106
107
108    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
109    *sse = var;
110    return (var - (((unsigned int)avg * avg) >> 7));
111}
112
113
114unsigned int vp8_variance8x8_c(
115    const unsigned char *src_ptr,
116    int  source_stride,
117    const unsigned char *ref_ptr,
118    int  recon_stride,
119    unsigned int *sse)
120{
121    unsigned int var;
122    int avg;
123
124
125    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
126    *sse = var;
127    return (var - (((unsigned int)avg * avg) >> 6));
128}
129
130unsigned int vp8_variance4x4_c(
131    const unsigned char *src_ptr,
132    int  source_stride,
133    const unsigned char *ref_ptr,
134    int  recon_stride,
135    unsigned int *sse)
136{
137    unsigned int var;
138    int avg;
139
140
141    variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
142    *sse = var;
143    return (var - (((unsigned int)avg * avg) >> 4));
144}
145
146
147unsigned int vp8_mse16x16_c(
148    const unsigned char *src_ptr,
149    int  source_stride,
150    const unsigned char *ref_ptr,
151    int  recon_stride,
152    unsigned int *sse)
153{
154    unsigned int var;
155    int avg;
156
157    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
158    *sse = var;
159    return var;
160}
161
162
163/****************************************************************************
164 *
165 *  ROUTINE       : filter_block2d_bil_first_pass
166 *
167 *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
168 *                  UINT32 src_pixels_per_line : Stride of input block.
169 *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
170 *                  UINT32 output_height     : Input block height.
171 *                  UINT32 output_width      : Input block width.
172 *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
173 *
174 *  OUTPUTS       : INT32 *output_ptr        : Pointer to filtered block.
175 *
176 *  RETURNS       : void
177 *
178 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
179 *                  either horizontal or vertical direction to produce the
180 *                  filtered output block. Used to implement first-pass
181 *                  of 2-D separable filter.
182 *
183 *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
184 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
185 *                  pixel_step defines whether the filter is applied
186 *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
187 *                  It defines the offset required to move from one input
188 *                  to the next.
189 *
190 ****************************************************************************/
191static void var_filter_block2d_bil_first_pass
192(
193    const unsigned char *src_ptr,
194    unsigned short *output_ptr,
195    unsigned int src_pixels_per_line,
196    int pixel_step,
197    unsigned int output_height,
198    unsigned int output_width,
199    const short *vp8_filter
200)
201{
202    unsigned int i, j;
203
204    for (i = 0; i < output_height; i++)
205    {
206        for (j = 0; j < output_width; j++)
207        {
208            /* Apply bilinear filter */
209            output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
210                             ((int)src_ptr[pixel_step] * vp8_filter[1]) +
211                             (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
212            src_ptr++;
213        }
214
215        /* Next row... */
216        src_ptr    += src_pixels_per_line - output_width;
217        output_ptr += output_width;
218    }
219}
220
221/****************************************************************************
222 *
223 *  ROUTINE       : filter_block2d_bil_second_pass
224 *
225 *  INPUTS        : INT32  *src_ptr          : Pointer to source block.
226 *                  UINT32 src_pixels_per_line : Stride of input block.
227 *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
228 *                  UINT32 output_height     : Input block height.
229 *                  UINT32 output_width      : Input block width.
230 *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
231 *
232 *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
233 *
234 *  RETURNS       : void
235 *
236 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
237 *                  either horizontal or vertical direction to produce the
238 *                  filtered output block. Used to implement second-pass
239 *                  of 2-D separable filter.
240 *
241 *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
242 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
243 *                  pixel_step defines whether the filter is applied
244 *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
245 *                  It defines the offset required to move from one input
246 *                  to the next.
247 *
248 ****************************************************************************/
249static void var_filter_block2d_bil_second_pass
250(
251    const unsigned short *src_ptr,
252    unsigned char  *output_ptr,
253    unsigned int  src_pixels_per_line,
254    unsigned int  pixel_step,
255    unsigned int  output_height,
256    unsigned int  output_width,
257    const short *vp8_filter
258)
259{
260    unsigned int  i, j;
261    int  Temp;
262
263    for (i = 0; i < output_height; i++)
264    {
265        for (j = 0; j < output_width; j++)
266        {
267            /* Apply filter */
268            Temp = ((int)src_ptr[0]          * vp8_filter[0]) +
269                   ((int)src_ptr[pixel_step] * vp8_filter[1]) +
270                   (VP8_FILTER_WEIGHT / 2);
271            output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
272            src_ptr++;
273        }
274
275        /* Next row... */
276        src_ptr    += src_pixels_per_line - output_width;
277        output_ptr += output_width;
278    }
279}
280
281
282unsigned int vp8_sub_pixel_variance4x4_c
283(
284    const unsigned char  *src_ptr,
285    int  src_pixels_per_line,
286    int  xoffset,
287    int  yoffset,
288    const unsigned char *dst_ptr,
289    int dst_pixels_per_line,
290    unsigned int *sse
291)
292{
293    unsigned char  temp2[20*16];
294    const short *HFilter, *VFilter;
295    unsigned short FData3[5*4]; /* Temp data bufffer used in filtering */
296
297    HFilter = vp8_bilinear_filters[xoffset];
298    VFilter = vp8_bilinear_filters[yoffset];
299
300    /* First filter 1d Horizontal */
301    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
302
303    /* Now filter Verticaly */
304    var_filter_block2d_bil_second_pass(FData3, temp2, 4,  4,  4,  4, VFilter);
305
306    return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
307}
308
309
310unsigned int vp8_sub_pixel_variance8x8_c
311(
312    const unsigned char  *src_ptr,
313    int  src_pixels_per_line,
314    int  xoffset,
315    int  yoffset,
316    const unsigned char *dst_ptr,
317    int dst_pixels_per_line,
318    unsigned int *sse
319)
320{
321    unsigned short FData3[9*8]; /* Temp data bufffer used in filtering */
322    unsigned char  temp2[20*16];
323    const short *HFilter, *VFilter;
324
325    HFilter = vp8_bilinear_filters[xoffset];
326    VFilter = vp8_bilinear_filters[yoffset];
327
328    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
329    var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
330
331    return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
332}
333
334unsigned int vp8_sub_pixel_variance16x16_c
335(
336    const unsigned char  *src_ptr,
337    int  src_pixels_per_line,
338    int  xoffset,
339    int  yoffset,
340    const unsigned char *dst_ptr,
341    int dst_pixels_per_line,
342    unsigned int *sse
343)
344{
345    unsigned short FData3[17*16];   /* Temp data bufffer used in filtering */
346    unsigned char  temp2[20*16];
347    const short *HFilter, *VFilter;
348
349    HFilter = vp8_bilinear_filters[xoffset];
350    VFilter = vp8_bilinear_filters[yoffset];
351
352    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
353    var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
354
355    return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
356}
357
358
359unsigned int vp8_variance_halfpixvar16x16_h_c(
360    const unsigned char *src_ptr,
361    int  source_stride,
362    const unsigned char *ref_ptr,
363    int  recon_stride,
364    unsigned int *sse)
365{
366    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
367                                         ref_ptr, recon_stride, sse);
368}
369
370
371unsigned int vp8_variance_halfpixvar16x16_v_c(
372    const unsigned char *src_ptr,
373    int  source_stride,
374    const unsigned char *ref_ptr,
375    int  recon_stride,
376    unsigned int *sse)
377{
378    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
379                                         ref_ptr, recon_stride, sse);
380}
381
382
383unsigned int vp8_variance_halfpixvar16x16_hv_c(
384    const unsigned char *src_ptr,
385    int  source_stride,
386    const unsigned char *ref_ptr,
387    int  recon_stride,
388    unsigned int *sse)
389{
390    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
391                                         ref_ptr, recon_stride, sse);
392}
393
394
395unsigned int vp8_sub_pixel_mse16x16_c
396(
397    const unsigned char  *src_ptr,
398    int  src_pixels_per_line,
399    int  xoffset,
400    int  yoffset,
401    const unsigned char *dst_ptr,
402    int dst_pixels_per_line,
403    unsigned int *sse
404)
405{
406    vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
407    return *sse;
408}
409
410unsigned int vp8_sub_pixel_variance16x8_c
411(
412    const unsigned char  *src_ptr,
413    int  src_pixels_per_line,
414    int  xoffset,
415    int  yoffset,
416    const unsigned char *dst_ptr,
417    int dst_pixels_per_line,
418    unsigned int *sse
419)
420{
421    unsigned short FData3[16*9];    /* Temp data bufffer used in filtering */
422    unsigned char  temp2[20*16];
423    const short *HFilter, *VFilter;
424
425    HFilter = vp8_bilinear_filters[xoffset];
426    VFilter = vp8_bilinear_filters[yoffset];
427
428    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
429    var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
430
431    return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
432}
433
434unsigned int vp8_sub_pixel_variance8x16_c
435(
436    const unsigned char  *src_ptr,
437    int  src_pixels_per_line,
438    int  xoffset,
439    int  yoffset,
440    const unsigned char *dst_ptr,
441    int dst_pixels_per_line,
442    unsigned int *sse
443)
444{
445    unsigned short FData3[9*16];    /* Temp data bufffer used in filtering */
446    unsigned char  temp2[20*16];
447    const short *HFilter, *VFilter;
448
449
450    HFilter = vp8_bilinear_filters[xoffset];
451    VFilter = vp8_bilinear_filters[yoffset];
452
453
454    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
455    var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
456
457    return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
458}
459