1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include "variance.h"
13#include "vp8/common/filter.h"
14
15
16unsigned int vp8_get_mb_ss_c
17(
18    const short *src_ptr
19)
20{
21    unsigned int i = 0, sum = 0;
22
23    do
24    {
25        sum += (src_ptr[i] * src_ptr[i]);
26        i++;
27    }
28    while (i < 256);
29
30    return sum;
31}
32
33
34static void variance(
35    const unsigned char *src_ptr,
36    int  source_stride,
37    const unsigned char *ref_ptr,
38    int  recon_stride,
39    int  w,
40    int  h,
41    unsigned int *sse,
42    int *sum)
43{
44    int i, j;
45    int diff;
46
47    *sum = 0;
48    *sse = 0;
49
50    for (i = 0; i < h; i++)
51    {
52        for (j = 0; j < w; j++)
53        {
54            diff = src_ptr[j] - ref_ptr[j];
55            *sum += diff;
56            *sse += diff * diff;
57        }
58
59        src_ptr += source_stride;
60        ref_ptr += recon_stride;
61    }
62}
63
64unsigned int
65vp8_get8x8var_c
66(
67    const unsigned char *src_ptr,
68    int  source_stride,
69    const unsigned char *ref_ptr,
70    int  recon_stride,
71    unsigned int *SSE,
72    int *Sum
73)
74{
75
76    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
77    return (*SSE - (((*Sum) * (*Sum)) >> 6));
78}
79
80unsigned int
81vp8_get16x16var_c
82(
83    const unsigned char *src_ptr,
84    int  source_stride,
85    const unsigned char *ref_ptr,
86    int  recon_stride,
87    unsigned int *SSE,
88    int *Sum
89)
90{
91
92    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
93    return (*SSE - (((*Sum) * (*Sum)) >> 8));
94
95}
96
97
98
99unsigned int vp8_variance16x16_c(
100    const unsigned char *src_ptr,
101    int  source_stride,
102    const unsigned char *ref_ptr,
103    int  recon_stride,
104    unsigned int *sse)
105{
106    unsigned int var;
107    int avg;
108
109
110    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
111    *sse = var;
112    return (var - ((avg * avg) >> 8));
113}
114
115unsigned int vp8_variance8x16_c(
116    const unsigned char *src_ptr,
117    int  source_stride,
118    const unsigned char *ref_ptr,
119    int  recon_stride,
120    unsigned int *sse)
121{
122    unsigned int var;
123    int avg;
124
125
126    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
127    *sse = var;
128    return (var - ((avg * avg) >> 7));
129}
130
131unsigned int vp8_variance16x8_c(
132    const unsigned char *src_ptr,
133    int  source_stride,
134    const unsigned char *ref_ptr,
135    int  recon_stride,
136    unsigned int *sse)
137{
138    unsigned int var;
139    int avg;
140
141
142    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
143    *sse = var;
144    return (var - ((avg * avg) >> 7));
145}
146
147
148unsigned int vp8_variance8x8_c(
149    const unsigned char *src_ptr,
150    int  source_stride,
151    const unsigned char *ref_ptr,
152    int  recon_stride,
153    unsigned int *sse)
154{
155    unsigned int var;
156    int avg;
157
158
159    variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
160    *sse = var;
161    return (var - ((avg * avg) >> 6));
162}
163
164unsigned int vp8_variance4x4_c(
165    const unsigned char *src_ptr,
166    int  source_stride,
167    const unsigned char *ref_ptr,
168    int  recon_stride,
169    unsigned int *sse)
170{
171    unsigned int var;
172    int avg;
173
174
175    variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
176    *sse = var;
177    return (var - ((avg * avg) >> 4));
178}
179
180
181unsigned int vp8_mse16x16_c(
182    const unsigned char *src_ptr,
183    int  source_stride,
184    const unsigned char *ref_ptr,
185    int  recon_stride,
186    unsigned int *sse)
187{
188    unsigned int var;
189    int avg;
190
191    variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
192    *sse = var;
193    return var;
194}
195
196
197/****************************************************************************
198 *
199 *  ROUTINE       : filter_block2d_bil_first_pass
200 *
201 *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
202 *                  UINT32 src_pixels_per_line : Stride of input block.
203 *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
204 *                  UINT32 output_height     : Input block height.
205 *                  UINT32 output_width      : Input block width.
206 *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
207 *
208 *  OUTPUTS       : INT32 *output_ptr        : Pointer to filtered block.
209 *
210 *  RETURNS       : void
211 *
212 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
213 *                  either horizontal or vertical direction to produce the
214 *                  filtered output block. Used to implement first-pass
215 *                  of 2-D separable filter.
216 *
217 *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
218 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
219 *                  pixel_step defines whether the filter is applied
220 *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
221 *                  It defines the offset required to move from one input
222 *                  to the next.
223 *
224 ****************************************************************************/
225static void var_filter_block2d_bil_first_pass
226(
227    const unsigned char *src_ptr,
228    unsigned short *output_ptr,
229    unsigned int src_pixels_per_line,
230    int pixel_step,
231    unsigned int output_height,
232    unsigned int output_width,
233    const short *vp8_filter
234)
235{
236    unsigned int i, j;
237
238    for (i = 0; i < output_height; i++)
239    {
240        for (j = 0; j < output_width; j++)
241        {
242            // Apply bilinear filter
243            output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
244                             ((int)src_ptr[pixel_step] * vp8_filter[1]) +
245                             (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
246            src_ptr++;
247        }
248
249        // Next row...
250        src_ptr    += src_pixels_per_line - output_width;
251        output_ptr += output_width;
252    }
253}
254
255/****************************************************************************
256 *
257 *  ROUTINE       : filter_block2d_bil_second_pass
258 *
259 *  INPUTS        : INT32  *src_ptr          : Pointer to source block.
260 *                  UINT32 src_pixels_per_line : Stride of input block.
261 *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
262 *                  UINT32 output_height     : Input block height.
263 *                  UINT32 output_width      : Input block width.
264 *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
265 *
266 *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
267 *
268 *  RETURNS       : void
269 *
270 *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
271 *                  either horizontal or vertical direction to produce the
272 *                  filtered output block. Used to implement second-pass
273 *                  of 2-D separable filter.
274 *
275 *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
276 *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
277 *                  pixel_step defines whether the filter is applied
278 *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
279 *                  It defines the offset required to move from one input
280 *                  to the next.
281 *
282 ****************************************************************************/
283static void var_filter_block2d_bil_second_pass
284(
285    const unsigned short *src_ptr,
286    unsigned char  *output_ptr,
287    unsigned int  src_pixels_per_line,
288    unsigned int  pixel_step,
289    unsigned int  output_height,
290    unsigned int  output_width,
291    const short *vp8_filter
292)
293{
294    unsigned int  i, j;
295    int  Temp;
296
297    for (i = 0; i < output_height; i++)
298    {
299        for (j = 0; j < output_width; j++)
300        {
301            // Apply filter
302            Temp = ((int)src_ptr[0]         * vp8_filter[0]) +
303                   ((int)src_ptr[pixel_step] * vp8_filter[1]) +
304                   (VP8_FILTER_WEIGHT / 2);
305            output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
306            src_ptr++;
307        }
308
309        // Next row...
310        src_ptr    += src_pixels_per_line - output_width;
311        output_ptr += output_width;
312    }
313}
314
315
316unsigned int vp8_sub_pixel_variance4x4_c
317(
318    const unsigned char  *src_ptr,
319    int  src_pixels_per_line,
320    int  xoffset,
321    int  yoffset,
322    const unsigned char *dst_ptr,
323    int dst_pixels_per_line,
324    unsigned int *sse
325)
326{
327    unsigned char  temp2[20*16];
328    const short *HFilter, *VFilter;
329    unsigned short FData3[5*4]; // Temp data bufffer used in filtering
330
331    HFilter = vp8_bilinear_filters[xoffset];
332    VFilter = vp8_bilinear_filters[yoffset];
333
334    // First filter 1d Horizontal
335    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
336
337    // Now filter Verticaly
338    var_filter_block2d_bil_second_pass(FData3, temp2, 4,  4,  4,  4, VFilter);
339
340    return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
341}
342
343
344unsigned int vp8_sub_pixel_variance8x8_c
345(
346    const unsigned char  *src_ptr,
347    int  src_pixels_per_line,
348    int  xoffset,
349    int  yoffset,
350    const unsigned char *dst_ptr,
351    int dst_pixels_per_line,
352    unsigned int *sse
353)
354{
355    unsigned short FData3[9*8]; // Temp data bufffer used in filtering
356    unsigned char  temp2[20*16];
357    const short *HFilter, *VFilter;
358
359    HFilter = vp8_bilinear_filters[xoffset];
360    VFilter = vp8_bilinear_filters[yoffset];
361
362    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
363    var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
364
365    return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
366}
367
368unsigned int vp8_sub_pixel_variance16x16_c
369(
370    const unsigned char  *src_ptr,
371    int  src_pixels_per_line,
372    int  xoffset,
373    int  yoffset,
374    const unsigned char *dst_ptr,
375    int dst_pixels_per_line,
376    unsigned int *sse
377)
378{
379    unsigned short FData3[17*16];   // Temp data bufffer used in filtering
380    unsigned char  temp2[20*16];
381    const short *HFilter, *VFilter;
382
383    HFilter = vp8_bilinear_filters[xoffset];
384    VFilter = vp8_bilinear_filters[yoffset];
385
386    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
387    var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
388
389    return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
390}
391
392
393unsigned int vp8_variance_halfpixvar16x16_h_c(
394    const unsigned char *src_ptr,
395    int  source_stride,
396    const unsigned char *ref_ptr,
397    int  recon_stride,
398    unsigned int *sse)
399{
400    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
401                                         ref_ptr, recon_stride, sse);
402}
403
404
405unsigned int vp8_variance_halfpixvar16x16_v_c(
406    const unsigned char *src_ptr,
407    int  source_stride,
408    const unsigned char *ref_ptr,
409    int  recon_stride,
410    unsigned int *sse)
411{
412    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
413                                         ref_ptr, recon_stride, sse);
414}
415
416
417unsigned int vp8_variance_halfpixvar16x16_hv_c(
418    const unsigned char *src_ptr,
419    int  source_stride,
420    const unsigned char *ref_ptr,
421    int  recon_stride,
422    unsigned int *sse)
423{
424    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
425                                         ref_ptr, recon_stride, sse);
426}
427
428
429unsigned int vp8_sub_pixel_mse16x16_c
430(
431    const unsigned char  *src_ptr,
432    int  src_pixels_per_line,
433    int  xoffset,
434    int  yoffset,
435    const unsigned char *dst_ptr,
436    int dst_pixels_per_line,
437    unsigned int *sse
438)
439{
440    vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
441    return *sse;
442}
443
444unsigned int vp8_sub_pixel_variance16x8_c
445(
446    const unsigned char  *src_ptr,
447    int  src_pixels_per_line,
448    int  xoffset,
449    int  yoffset,
450    const unsigned char *dst_ptr,
451    int dst_pixels_per_line,
452    unsigned int *sse
453)
454{
455    unsigned short FData3[16*9];    // Temp data bufffer used in filtering
456    unsigned char  temp2[20*16];
457    const short *HFilter, *VFilter;
458
459    HFilter = vp8_bilinear_filters[xoffset];
460    VFilter = vp8_bilinear_filters[yoffset];
461
462    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
463    var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
464
465    return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
466}
467
468unsigned int vp8_sub_pixel_variance8x16_c
469(
470    const unsigned char  *src_ptr,
471    int  src_pixels_per_line,
472    int  xoffset,
473    int  yoffset,
474    const unsigned char *dst_ptr,
475    int dst_pixels_per_line,
476    unsigned int *sse
477)
478{
479    unsigned short FData3[9*16];    // Temp data bufffer used in filtering
480    unsigned char  temp2[20*16];
481    const short *HFilter, *VFilter;
482
483
484    HFilter = vp8_bilinear_filters[xoffset];
485    VFilter = vp8_bilinear_filters[yoffset];
486
487
488    var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
489    var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
490
491    return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
492}
493